ARIMA
--- Import libraries for ARIMA----¶
In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from pmdarima import auto_arima
import warnings
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from pmdarima import auto_arima
import warnings
warnings.filterwarnings('ignore')
# Load and preprocess data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
print(f"Original data length: {len(data)}")
# --- Step 1: Train/Test Split ---
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:]
print(f"Train size: {len(train_data)}")
print(f"Validation size: {len(val_data)}")
print(f"Test size: {len(test_data)}")
# --- Step 2: Stationarity Check ---
def check_stationarity(series):
"""Check if series is stationary using ADF test"""
result = adfuller(series)
print(f"ADF Statistic: {result[0]:.4f}")
print(f"p-value: {result[1]:.4f}")
print("Critical Values:")
for key, value in result[4].items():
print(f" {key}: {value:.4f}")
return result[1] < 0.05 # Return True if stationary
print("\nChecking stationarity of training data...")
is_stationary = check_stationarity(train_data)
print(f"Data is {'stationary' if is_stationary else 'non-stationary'}")
# --- Step 3: Auto ARIMA for Optimal Parameters ---
print("\n" + "="*50)
print("AUTO ARIMA PARAMETER SEARCH")
print("="*50)
# Auto ARIMA for optimal parameters
auto_arima_model = auto_arima(
train_data,
seasonal=False, # Non-seasonal ARIMA
stepwise=True,
suppress_warnings=True,
error_action='ignore',
trace=True,
information_criterion='aic'
)
print(f"Optimal ARIMA order: {auto_arima_model.order}")
# --- Step 4: Build and Train ARIMA Model ---
print("\n" + "="*50)
print("ARIMA MODEL TRAINING")
print("="*50)
arima_model = ARIMA(train_data, order=auto_arima_model.order)
arima_result = arima_model.fit()
print("\nARIMA MODEL SUMMARY")
print("="*50)
print(arima_result.summary())
# ARIMA forecasting
arima_forecast = arima_result.forecast(steps=len(val_data))
arima_val_pred = arima_forecast
# --- Step 5: Auto SARIMA for Optimal Parameters ---
print("\n" + "="*50)
print("AUTO SARIMA PARAMETER SEARCH")
print("="*50)
# Auto SARIMA for optimal parameters (with seasonal component)
auto_sarima_model = auto_arima(
train_data,
seasonal=True,
m=26, # Weekly data with yearly seasonality
stepwise=True,
suppress_warnings=True,
error_action='ignore',
trace=True,
information_criterion='aic'
)
print(f"Optimal SARIMA order: {auto_sarima_model.order}")
print(f"Optimal Seasonal order: {auto_sarima_model.seasonal_order}")
# --- Step 6: Build and Train SARIMA Model ---
print("\n" + "="*50)
print("SARIMA MODEL TRAINING")
print("="*50)
sarima_model = SARIMAX(
train_data,
order=auto_sarima_model.order,
seasonal_order=auto_sarima_model.seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
sarima_result = sarima_model.fit(disp=False)
print("\nSARIMA MODEL SUMMARY")
print("="*50)
print(sarima_result.summary())
# SARIMA forecasting
sarima_forecast = sarima_result.forecast(steps=len(val_data))
sarima_val_pred = sarima_forecast
# --- Step 7: Model Evaluation on Validation Set ---
def evaluate_model(actual, predicted, model_name):
"""Comprehensive model evaluation"""
mse = mean_squared_error(actual, predicted)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, predicted)
mape = mean_absolute_percentage_error(actual, predicted)
r2 = r2_score(actual, predicted)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
predicted_diff = np.sign(np.diff(predicted))
da = (np.sum(actual_diff == predicted_diff) / len(actual_diff)) * 100
print(f"\n{model_name} VALIDATION PERFORMANCE:")
print("="*40)
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"R²: {r2:.4f}")
print(f"Directional Accuracy: {da:.2f}%")
return {
'MSE': mse, 'RMSE': rmse, 'MAE': mae,
'MAPE': mape, 'R²': r2, 'DA': da
}
# Evaluate both models on validation set
arima_metrics = evaluate_model(val_data, arima_val_pred, "ARIMA")
sarima_metrics = evaluate_model(val_data, sarima_val_pred, "SARIMA")
# --- Step 8: Final Model Selection and Test Forecasting ---
# Select the best model based on validation performance
if arima_metrics['RMSE'] < sarima_metrics['RMSE']:
print("\nSELECTED BEST MODEL: ARIMA")
best_model = arima_result
best_model_name = "ARIMA"
best_order = auto_arima_model.order
else:
print("\nSELECTED BEST MODEL: SARIMA")
best_model = sarima_result
best_model_name = "SARIMA"
best_order = f"{auto_sarima_model.order}{auto_sarima_model.seasonal_order}"
# Retrain best model on train + validation data
final_train_data = np.concatenate([train_data, val_data])
if best_model_name == "ARIMA":
final_model = ARIMA(final_train_data, order=best_order)
final_result = final_model.fit()
else:
final_model = SARIMAX(
final_train_data,
order=auto_sarima_model.order,
seasonal_order=auto_sarima_model.seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
final_result = final_model.fit(disp=False)
# Final forecasting on test set
test_forecast = final_result.forecast(steps=len(test_data))
# --- Step 9: Final Evaluation on Test Set ---
final_metrics = evaluate_model(test_data, test_forecast, f"FINAL {best_model_name}")
# --- Step 10: Visualization ---
# Create date indices for plotting
train_dates = df.index[:train_size]
val_dates = df.index[train_size:train_size+val_size]
test_dates = df.index[train_size+val_size:train_size+val_size+test_size]
# Plot 1: ARIMA Results
plt.figure(figsize=(12, 8))
plt.plot(train_dates, train_data, label='Train Data', color='blue', alpha=0.7)
plt.plot(val_dates, val_data, label='Actual Validation', color='green', linewidth=2)
plt.plot(val_dates, arima_val_pred, label='ARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(val_dates[0], color='gray', linestyle='--', label='Validation Start')
plt.title(f'ARIMA Model: Order {auto_arima_model.order}')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/arima_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: SARIMA Results
plt.figure(figsize=(12, 8))
plt.plot(train_dates, train_data, label='Train Data', color='blue', alpha=0.7)
plt.plot(val_dates, val_data, label='Actual Validation', color='green', linewidth=2)
plt.plot(val_dates, sarima_val_pred, label='SARIMA Forecast', color='orange', linestyle='--', linewidth=2)
plt.axvline(val_dates[0], color='gray', linestyle='--', label='Validation Start')
plt.title(f'SARIMA Model: Order {auto_sarima_model.order}{auto_sarima_model.seasonal_order}')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Final Test Results
plt.figure(figsize=(12, 8))
plt.plot(df.index[:train_size+val_size], final_train_data, label='Train+Validation', color='blue', alpha=0.7)
plt.plot(test_dates, test_data, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, test_forecast, label=f'{best_model_name} Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title(f'Final {best_model_name} Model: Test Performance')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 4: Separate Actual vs Predicted (Test)
plt.figure(figsize=(12, 8))
plt.plot(test_dates, test_data, label='Actual Test', color='blue', linewidth=2)
plt.plot(test_dates, test_forecast, label=f'{best_model_name} Predicted', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates,
test_forecast - final_metrics['RMSE'],
test_forecast + final_metrics['RMSE'],
alpha=0.2, color='red', label='± RMSE')
plt.title(f'Actual vs Predicted - {best_model_name} (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 5: Model Comparison
plt.figure(figsize=(12, 8))
models = ['ARIMA', 'SARIMA']
rmse_values = [arima_metrics['RMSE'], sarima_metrics['RMSE']]
mae_values = [arima_metrics['MAE'], sarima_metrics['MAE']]
x = np.arange(len(models))
width = 0.35
plt.bar(x - width/2, rmse_values, width, label='RMSE', alpha=0.8)
plt.bar(x + width/2, mae_values, width, label='MAE', alpha=0.8)
plt.xlabel('Models')
plt.ylabel('Error Values')
plt.title('Model Comparison (Validation Set)')
plt.xticks(x, models)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result5.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 6: Residuals Analysis
residuals = test_data - test_forecast
plt.figure(figsize=(12, 8))
plt.plot(test_dates, residuals, color='purple', alpha=0.7)
plt.axhline(0, color='red', linestyle='--', linewidth=2)
plt.title(f'{best_model_name} Model Residuals (Test Set)')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result6.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 11: Future Forecasting ---
print("\n" + "="*50)
print("FUTURE FORECASTING")
print("="*50)
# Forecast next 12 periods
future_steps = 12
future_forecast = final_result.forecast(steps=len(test_data) + future_steps)
future_forecast = future_forecast[-future_steps:] # Get only future values
# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=future_steps, freq='W')
print(f"Future Forecast (Next {future_steps} weeks):")
print("-" * 40)
for i, (date, price) in enumerate(zip(future_dates, future_forecast), 1):
print(f"Week {i:2d} ({date.strftime('%Y-%m-%d')}): {price:.2f} Rs./kg")
# Plot future forecast
plt.figure(figsize=(12, 6))
# Plot historical data
historical_dates = df.index[-100:] # Last 100 points
historical_data = data[-100:]
plt.plot(historical_dates, historical_data, label='Historical Data', color='blue', linewidth=2)
# Plot future forecast
plt.plot(future_dates, future_forecast, label='Future Forecast', color='red', linestyle='--', linewidth=2)
plt.fill_between(future_dates,
future_forecast - final_metrics['RMSE'],
future_forecast + final_metrics['RMSE'],
alpha=0.2, color='red', label='Uncertainty Band')
plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title(f'{best_model_name} Future Price Forecast (Next {future_steps} weeks)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result7.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 12: Model Diagnostics ---
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)
# Plot diagnostics for the best model
if best_model_name == "ARIMA":
print("ARIMA Model Diagnostics:")
# ARIMA diagnostics
residuals = final_result.resid
plt.figure(figsize=(12, 8))
plt.plot(residuals)
plt.title('ARIMA Model Residuals')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result8.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 8))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.title('Residual Distribution')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result9.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 8))
plt.acorr(residuals, maxlags=20)
plt.title('Residual Autocorrelation')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result11.png", dpi=300, bbox_inches='tight')
plt.show()
else:
print("SARIMA Model Diagnostics:")
# Plot SARIMA diagnostics
final_result.plot_diagnostics(figsize=(12, 8))
plt.suptitle('SARIMA Model Diagnostics', y=1.02)
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esarima_result12.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Final Summary ---
print("\n" + "="*60)
print("FINAL SUMMARY")
print("="*60)
print(f"Best Model: {best_model_name}")
print(f"Model Order: {best_order}")
print(f"Test RMSE: {final_metrics['RMSE']:.4f}")
print(f"Test MAE: {final_metrics['MAE']:.4f}")
print(f"Test MAPE: {final_metrics['MAPE']:.2f}%")
print(f"Test R²: {final_metrics['R²']:.4f}")
print(f"Directional Accuracy: {final_metrics['DA']:.2f}%")
print("\nKey Insights:")
print("- ARIMA models are simpler and faster to train")
print("- SARIMA models capture seasonal patterns better")
print("- The best model was selected based on validation performance")
print("- Future forecasts include uncertainty bands based on RMSE")
Original data length: 722
Train size: 505
Validation size: 108
Test size: 109
Checking stationarity of training data...
ADF Statistic: -1.3335
p-value: 0.6137
Critical Values:
1%: -3.4439
5%: -2.8675
10%: -2.5699
Data is non-stationary
==================================================
AUTO ARIMA PARAMETER SEARCH
==================================================
Performing stepwise search to minimize aic
ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=6599.863, Time=0.60 sec
ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=6644.336, Time=0.02 sec
ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=6630.207, Time=0.05 sec
ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=6627.971, Time=0.06 sec
ARIMA(0,1,0)(0,0,0)[0] : AIC=6642.340, Time=0.02 sec
ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=6604.065, Time=0.49 sec
ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=6604.049, Time=0.49 sec
ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=6598.494, Time=0.70 sec
ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=6597.054, Time=0.69 sec
ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=6614.705, Time=0.09 sec
ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=6598.722, Time=0.72 sec
ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=6631.489, Time=0.06 sec
ARIMA(4,1,0)(0,0,0)[0] intercept : AIC=6613.001, Time=0.11 sec
ARIMA(4,1,2)(0,0,0)[0] intercept : AIC=6599.037, Time=1.09 sec
ARIMA(3,1,1)(0,0,0)[0] : AIC=6595.176, Time=0.33 sec
ARIMA(2,1,1)(0,0,0)[0] : AIC=6602.227, Time=0.27 sec
ARIMA(3,1,0)(0,0,0)[0] : AIC=6612.716, Time=0.06 sec
ARIMA(4,1,1)(0,0,0)[0] : AIC=6596.852, Time=0.40 sec
ARIMA(3,1,2)(0,0,0)[0] : AIC=6596.627, Time=0.42 sec
ARIMA(2,1,0)(0,0,0)[0] : AIC=6629.496, Time=0.03 sec
ARIMA(2,1,2)(0,0,0)[0] : AIC=6598.059, Time=0.31 sec
ARIMA(4,1,0)(0,0,0)[0] : AIC=6611.015, Time=0.07 sec
ARIMA(4,1,2)(0,0,0)[0] : AIC=6597.167, Time=0.58 sec
Best model: ARIMA(3,1,1)(0,0,0)[0]
Total fit time: 7.696 seconds
Optimal ARIMA order: (3, 1, 1)
==================================================
ARIMA MODEL TRAINING
==================================================
ARIMA MODEL SUMMARY
==================================================
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 505
Model: ARIMA(3, 1, 1) Log Likelihood -3292.588
Date: Wed, 05 Nov 2025 AIC 6595.176
Time: 22:53:13 BIC 6616.288
Sample: 0 HQIC 6603.457
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.5462 0.032 17.318 0.000 0.484 0.608
ar.L2 0.0657 0.048 1.379 0.168 -0.028 0.159
ar.L3 -0.1467 0.039 -3.720 0.000 -0.224 -0.069
ma.L1 -0.7956 0.032 -24.714 0.000 -0.859 -0.733
sigma2 2.765e+04 483.915 57.144 0.000 2.67e+04 2.86e+04
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 43865.21
Prob(Q): 0.95 Prob(JB): 0.00
Heteroskedasticity (H): 19.12 Skew: 0.63
Prob(H) (two-sided): 0.00 Kurtosis: 48.69
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
==================================================
AUTO SARIMA PARAMETER SEARCH
==================================================
Performing stepwise search to minimize aic
ARIMA(2,1,2)(1,0,1)[26] intercept : AIC=6598.001, Time=7.82 sec
ARIMA(0,1,0)(0,0,0)[26] intercept : AIC=6644.336, Time=0.02 sec
ARIMA(1,1,0)(1,0,0)[26] intercept : AIC=6626.490, Time=0.58 sec
ARIMA(0,1,1)(0,0,1)[26] intercept : AIC=6624.707, Time=1.63 sec
ARIMA(0,1,0)(0,0,0)[26] : AIC=6642.340, Time=0.02 sec
ARIMA(2,1,2)(0,0,1)[26] intercept : AIC=6596.011, Time=5.74 sec
ARIMA(2,1,2)(0,0,0)[26] intercept : AIC=6599.863, Time=0.56 sec
ARIMA(2,1,2)(0,0,2)[26] intercept : AIC=6597.998, Time=12.40 sec
ARIMA(2,1,2)(1,0,0)[26] intercept : AIC=6596.125, Time=6.48 sec
ARIMA(2,1,2)(1,0,2)[26] intercept : AIC=6599.995, Time=16.13 sec
ARIMA(1,1,2)(0,0,1)[26] intercept : AIC=6600.151, Time=4.46 sec
ARIMA(2,1,1)(0,0,1)[26] intercept : AIC=6600.150, Time=5.43 sec
ARIMA(3,1,2)(0,0,1)[26] intercept : AIC=6593.821, Time=7.47 sec
ARIMA(3,1,2)(0,0,0)[26] intercept : AIC=6598.494, Time=0.66 sec
ARIMA(3,1,2)(1,0,1)[26] intercept : AIC=6596.054, Time=7.80 sec
ARIMA(3,1,2)(0,0,2)[26] intercept : AIC=6595.598, Time=14.96 sec
ARIMA(3,1,2)(1,0,0)[26] intercept : AIC=6593.826, Time=7.24 sec
ARIMA(3,1,2)(1,0,2)[26] intercept : AIC=6597.592, Time=17.22 sec
ARIMA(3,1,1)(0,0,1)[26] intercept : AIC=6592.376, Time=5.06 sec
ARIMA(3,1,1)(0,0,0)[26] intercept : AIC=6597.054, Time=0.65 sec
ARIMA(3,1,1)(1,0,1)[26] intercept : AIC=6594.571, Time=7.13 sec
ARIMA(3,1,1)(0,0,2)[26] intercept : AIC=6594.334, Time=12.58 sec
ARIMA(3,1,1)(1,0,0)[26] intercept : AIC=6592.563, Time=4.84 sec
ARIMA(3,1,1)(1,0,2)[26] intercept : AIC=6596.320, Time=14.69 sec
ARIMA(3,1,0)(0,0,1)[26] intercept : AIC=6610.714, Time=0.66 sec
ARIMA(4,1,1)(0,0,1)[26] intercept : AIC=6593.986, Time=6.41 sec
ARIMA(2,1,0)(0,0,1)[26] intercept : AIC=6628.005, Time=0.60 sec
ARIMA(4,1,0)(0,0,1)[26] intercept : AIC=6609.412, Time=2.13 sec
ARIMA(4,1,2)(0,0,1)[26] intercept : AIC=6594.248, Time=6.99 sec
ARIMA(3,1,1)(0,0,1)[26] : AIC=6590.704, Time=2.05 sec
ARIMA(3,1,1)(0,0,0)[26] : AIC=6595.176, Time=0.30 sec
ARIMA(3,1,1)(1,0,1)[26] : AIC=6592.682, Time=3.51 sec
ARIMA(3,1,1)(0,0,2)[26] : AIC=6592.680, Time=5.61 sec
ARIMA(3,1,1)(1,0,0)[26] : AIC=6590.857, Time=1.51 sec
ARIMA(3,1,1)(1,0,2)[26] : AIC=6594.677, Time=8.03 sec
ARIMA(2,1,1)(0,0,1)[26] : AIC=6598.543, Time=2.69 sec
ARIMA(3,1,0)(0,0,1)[26] : AIC=6608.758, Time=0.41 sec
ARIMA(4,1,1)(0,0,1)[26] : AIC=6592.328, Time=2.25 sec
ARIMA(3,1,2)(0,0,1)[26] : AIC=6591.988, Time=2.66 sec
ARIMA(2,1,0)(0,0,1)[26] : AIC=6626.029, Time=0.32 sec
ARIMA(2,1,2)(0,0,1)[26] : AIC=6594.428, Time=2.21 sec
ARIMA(4,1,0)(0,0,1)[26] : AIC=6607.469, Time=1.23 sec
ARIMA(4,1,2)(0,0,1)[26] : AIC=6592.588, Time=3.70 sec
Best model: ARIMA(3,1,1)(0,0,1)[26]
Total fit time: 214.903 seconds
Optimal SARIMA order: (3, 1, 1)
Optimal Seasonal order: (0, 0, 1, 26)
==================================================
SARIMA MODEL TRAINING
==================================================
SARIMA MODEL SUMMARY
==================================================
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 505
Model: SARIMAX(3, 1, 1)x(0, 0, 1, 26) Log Likelihood -3114.950
Date: Wed, 05 Nov 2025 AIC 6241.900
Time: 22:56:50 BIC 6266.893
Sample: 0 HQIC 6251.728
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.5672 0.028 20.138 0.000 0.512 0.622
ar.L2 0.0648 0.048 1.342 0.180 -0.030 0.160
ar.L3 -0.1593 0.041 -3.911 0.000 -0.239 -0.079
ma.L1 -0.8109 0.028 -29.161 0.000 -0.865 -0.756
ma.S.L26 -0.1196 0.018 -6.719 0.000 -0.154 -0.085
sigma2 2.823e+04 611.104 46.201 0.000 2.7e+04 2.94e+04
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 34527.58
Prob(Q): 0.99 Prob(JB): 0.00
Heteroskedasticity (H): 33.72 Skew: 1.06
Prob(H) (two-sided): 0.00 Kurtosis: 44.67
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
ARIMA VALIDATION PERFORMANCE:
========================================
MSE: 428514.3561
RMSE: 654.6101
MAE: 604.0947
MAPE: 0.62%
R²: -5.3783
Directional Accuracy: 31.78%
SARIMA VALIDATION PERFORMANCE:
========================================
MSE: 608951.4528
RMSE: 780.3534
MAE: 731.0257
MAPE: 0.74%
R²: -8.0640
Directional Accuracy: 26.17%
SELECTED BEST MODEL: ARIMA
FINAL ARIMA VALIDATION PERFORMANCE:
========================================
MSE: 658967.5673
RMSE: 811.7682
MAE: 686.5055
MAPE: 0.38%
R²: -2.3151
Directional Accuracy: 31.48%
================================================== FUTURE FORECASTING ================================================== Future Forecast (Next 12 weeks): ---------------------------------------- Week 1 (2024-11-03): 905.05 Rs./kg Week 2 (2024-11-10): 905.05 Rs./kg Week 3 (2024-11-17): 905.05 Rs./kg Week 4 (2024-11-24): 905.05 Rs./kg Week 5 (2024-12-01): 905.05 Rs./kg Week 6 (2024-12-08): 905.05 Rs./kg Week 7 (2024-12-15): 905.05 Rs./kg Week 8 (2024-12-22): 905.05 Rs./kg Week 9 (2024-12-29): 905.05 Rs./kg Week 10 (2025-01-05): 905.05 Rs./kg Week 11 (2025-01-12): 905.05 Rs./kg Week 12 (2025-01-19): 905.05 Rs./kg
================================================== MODEL DIAGNOSTICS ================================================== ARIMA Model Diagnostics:
============================================================ FINAL SUMMARY ============================================================ Best Model: ARIMA Model Order: (3, 1, 1) Test RMSE: 811.7682 Test MAE: 686.5055 Test MAPE: 0.38% Test R²: -2.3151 Directional Accuracy: 31.48% Key Insights: - ARIMA models are simpler and faster to train - SARIMA models capture seasonal patterns better - The best model was selected based on validation performance - Future forecasts include uncertainty bands based on RMSE
In [11]:
import matplotlib.pyplot as plt
# Use test_dates, test_data, and test_forecast for SARIMA model
plt.figure(figsize=(12, 8))
plt.plot(test_dates, test_data, label='Actual Test', color='blue', linewidth=2)
plt.plot(test_dates, test_forecast, label='SARIMA Predicted', color='orange', linestyle='--', linewidth=2)
plt.title('SARIMA: Actual vs Predicted Cardamom Price (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/sarima_test_vs_predicted.png", dpi=300, bbox_inches='tight')
plt.show()
In [2]:
# Create a temporary series for decomposition
train_series = pd.Series(train_data, index=df.index[:train_size])
# Perform seasonal decomposition
# Using a period of 52 for weekly data with yearly seasonality
decomposition = seasonal_decompose(train_series, model='additive', period=26)
# Plot the decomposition
plt.figure(figsize=(12, 8))
plt.subplot(4, 1, 1)
plt.plot(decomposition.observed)
plt.title('Observed')
plt.grid(True)
plt.subplot(4, 1, 2)
plt.plot(decomposition.trend)
plt.title('Trend')
plt.grid(True)
plt.subplot(4, 1, 3)
plt.plot(decomposition.seasonal)
plt.title('Seasonality')
plt.grid(True)
plt.subplot(4, 1, 4)
plt.plot(decomposition.resid)
plt.title('Residuals')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/arima_result41.png", dpi=300, bbox_inches='tight')
plt.show()
In [60]:
# --- Step 9: Final Evaluation on Test Set ---
# Evaluate both models on test set
# First, let's get predictions from both models on the test set
# ARIMA model predictions on test set
arima_full_model = ARIMA(np.concatenate([train_data, val_data]), order=auto_arima_model.order)
arima_full_result = arima_full_model.fit()
arima_test_pred = arima_full_result.forecast(steps=len(test_data))
# SARIMA model predictions on test set
sarima_full_model = SARIMAX(
np.concatenate([train_data, val_data]),
order=auto_sarima_model.order,
seasonal_order=auto_sarima_model.seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
sarima_full_result = sarima_full_model.fit(disp=False)
sarima_test_pred = sarima_full_result.forecast(steps=len(test_data))
# Evaluate both models on test set
arima_test_metrics = evaluate_model(test_data, arima_test_pred, "ARIMA TEST")
sarima_test_metrics = evaluate_model(test_data, sarima_test_pred, "SARIMA TEST")
# Select the best model based on test performance
if arima_test_metrics['RMSE'] < sarima_test_metrics['RMSE']:
print("\nSELECTED BEST MODEL: ARIMA")
best_model = arima_full_result
best_model_name = "ARIMA"
best_order = auto_arima_model.order
test_forecast = arima_test_pred
else:
print("\nSELECTED BEST MODEL: SARIMA")
best_model = sarima_full_result
best_model_name = "SARIMA"
best_order = f"{auto_sarima_model.order}{auto_sarima_model.seasonal_order}"
test_forecast = sarima_test_pred
final_metrics = evaluate_model(test_data, test_forecast, f"FINAL {best_model_name}")
# Create a comparison table of metrics
metrics_df = pd.DataFrame({
'ARIMA Validation': arima_metrics,
'SARIMA Validation': sarima_metrics,
'ARIMA Test': arima_test_metrics,
'SARIMA Test': sarima_test_metrics,
'Final Model': final_metrics
}).T
print("\n" + "="*60)
print("COMPREHENSIVE MODEL COMPARISON")
print("="*60)
print(metrics_df.round(4))
# --- Step 10: Visualization ---
# Create date indices for plotting
train_dates = df.index[:train_size]
val_dates = df.index[train_size:train_size+val_size]
test_dates = df.index[train_size+val_size:train_size+val_size+test_size]
# Create separate figures for ARIMA and SARIMA actual vs predicted
plt.figure(figsize=(15, 10))
# ARIMA Actual vs Predicted (Test Set)
plt.subplot(2, 2, 1)
plt.plot(test_dates, test_data, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, arima_test_pred, label='ARIMA Predicted', color='red', linestyle='--', linewidth=2)
plt.title('ARIMA: Actual vs Predicted (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
# ARIMA Scatter Plot (Test Set)
plt.subplot(2, 2, 2)
plt.scatter(test_data, arima_test_pred, alpha=0.6)
min_val = min(test_data.min(), arima_test_pred.min())
max_val = max(test_data.max(), arima_test_pred.max())
plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect Prediction')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('ARIMA: Actual vs Predicted Scatter Plot (Test Set)')
plt.legend()
plt.grid(True)
# SARIMA Actual vs Predicted (Test Set)
plt.subplot(2, 2, 3)
plt.plot(test_dates, test_data, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, sarima_test_pred, label='SARIMA Predicted', color='green', linestyle='--', linewidth=2)
plt.title('SARIMA: Actual vs Predicted (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
# SARIMA Scatter Plot (Test Set)
plt.subplot(2, 2, 4)
plt.scatter(test_data, sarima_test_pred, alpha=0.6)
min_val = min(test_data.min(), sarima_test_pred.min())
max_val = max(test_data.max(), sarima_test_pred.max())
plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect Prediction')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('SARIMA: Actual vs Predicted Scatter Plot (Test Set)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# Additional visualization showing the complete picture
plt.figure(figsize=(20, 15))
# Plot 1: ARIMA Results
plt.subplot(3, 2, 1)
plt.plot(train_dates, train_data, label='Train Data', color='blue', alpha=0.7)
plt.plot(val_dates, val_data, label='Actual Validation', color='green', linewidth=2)
plt.plot(val_dates, arima_val_pred, label='ARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(val_dates[0], color='gray', linestyle='--', label='Validation Start')
plt.title(f'ARIMA Model: Order {auto_arima_model.order}')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
# Plot 2: SARIMA Results
plt.subplot(3, 2, 2)
plt.plot(train_dates, train_data, label='Train Data', color='blue', alpha=0.7)
plt.plot(val_dates, val_data, label='Actual Validation', color='green', linewidth=2)
plt.plot(val_dates, sarima_val_pred, label='SARIMA Forecast', color='orange', linestyle='--', linewidth=2)
plt.axvline(val_dates[0], color='gray', linestyle='--', label='Validation Start')
plt.title(f'SARIMA Model: Order {auto_sarima_model.order}{auto_sarima_model.seasonal_order}')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
# Plot 3: Final Test Results
plt.subplot(3, 2, 3)
plt.plot(df.index[:train_size+val_size], np.concatenate([train_data, val_data]), label='Train+Validation', color='blue', alpha=0.7)
plt.plot(test_dates, test_data, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, test_forecast, label=f'{best_model_name} Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title(f'Final {best_model_name} Model: Test Performance')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
# Plot 4: Separate Actual vs Predicted (Test)
plt.subplot(3, 2, 4)
plt.plot(test_dates, test_data, label='Actual Test', color='blue', linewidth=2)
plt.plot(test_dates, test_forecast, label=f'{best_model_name} Predicted', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates,
test_forecast - final_metrics['RMSE'],
test_forecast + final_metrics['RMSE'],
alpha=0.2, color='red', label='± RMSE')
plt.title(f'Actual vs Predicted - {best_model_name} (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
# Plot 5: Model Comparison (Test Set)
plt.subplot(3, 2, 5)
models = ['ARIMA', 'SARIMA']
rmse_values = [arima_test_metrics['RMSE'], sarima_test_metrics['RMSE']]
mae_values = [arima_test_metrics['MAE'], sarima_test_metrics['MAE']]
x = np.arange(len(models))
width = 0.35
plt.bar(x - width/2, rmse_values, width, label='RMSE', alpha=0.8)
plt.bar(x + width/2, mae_values, width, label='MAE', alpha=0.8)
plt.xlabel('Models')
plt.ylabel('Error Values')
plt.title('Model Comparison (Test Set)')
plt.xticks(x, models)
plt.legend()
plt.grid(True, alpha=0.3)
# Plot 6: Residuals Analysis
residuals = test_data - test_forecast
plt.subplot(3, 2, 6)
plt.plot(test_dates, residuals, color='purple', alpha=0.7)
plt.axhline(0, color='red', linestyle='--', linewidth=2)
plt.title(f'{best_model_name} Model Residuals (Test Set)')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# --- Step 11: Future Forecasting ---
print("\n" + "="*50)
print("FUTURE FORECASTING")
print("="*50)
# Forecast next 12 periods
future_steps = 12
future_forecast = best_model.forecast(steps=len(test_data) + future_steps)
future_forecast = future_forecast[-future_steps:] # Get only future values
# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=future_steps, freq='W')
print(f"Future Forecast (Next {future_steps} weeks):")
print("-" * 40)
for i, (date, price) in enumerate(zip(future_dates, future_forecast), 1):
print(f"Week {i:2d} ({date.strftime('%Y-%m-%d')}): {price:.2f} Rs./kg")
# Plot future forecast
plt.figure(figsize=(12, 6))
# Plot historical data
historical_dates = df.index[-100:] # Last 100 points
historical_data = data[-100:]
plt.plot(historical_dates, historical_data, label='Historical Data', color='blue', linewidth=2)
# Plot future forecast
plt.plot(future_dates, future_forecast, label='Future Forecast', color='red', linestyle='--', linewidth=2)
plt.fill_between(future_dates,
future_forecast - final_metrics['RMSE'],
future_forecast + final_metrics['RMSE'],
alpha=0.2, color='red', label='Uncertainty Band')
plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title(f'{best_model_name} Future Price Forecast (Next {future_steps} weeks)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# --- Step 12: Model Diagnostics ---
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)
# Plot diagnostics for the best model
if best_model_name == "ARIMA":
print("ARIMA Model Diagnostics:")
# ARIMA diagnostics
residuals = best_model.resid
plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
plt.plot(residuals)
plt.title('ARIMA Model Residuals')
plt.grid(True)
plt.subplot(2, 2, 2)
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.title('Residual Distribution')
plt.grid(True)
plt.subplot(2, 2, 3)
plt.acorr(residuals, maxlags=20)
plt.title('Residual Autocorrelation')
plt.grid(True)
plt.tight_layout()
plt.show()
else:
print("SARIMA Model Diagnostics:")
# Plot SARIMA diagnostics
best_model.plot_diagnostics(figsize=(12, 8))
plt.suptitle('SARIMA Model Diagnostics', y=1.02)
plt.tight_layout()
plt.show()
# --- Final Summary ---
print("\n" + "="*60)
print("FINAL SUMMARY")
print("="*60)
print(f"Best Model: {best_model_name}")
print(f"Model Order: {best_order}")
print(f"Test RMSE: {final_metrics['RMSE']:.4f}")
print(f"Test MAE: {final_metrics['MAE']:.4f}")
print(f"Test MAPE: {final_metrics['MAPE']:.2f}%")
print(f"Test R²: {final_metrics['R²']:.4f}")
print(f"Directional Accuracy: {final_metrics['DA']:.2f}%")
print("\nKey Insights:")
print("- ARIMA models are simpler and faster to train")
print("- SARIMA models capture seasonal patterns better")
print("- The best model was selected based on test performance")
print("- Future forecasts include uncertainty bands based on RMSE")
ARIMA TEST VALIDATION PERFORMANCE:
========================================
MSE: 658967.5673
RMSE: 811.7682
MAE: 686.5055
MAPE: 0.38%
R²: -2.3151
Directional Accuracy: 31.48%
SARIMA TEST VALIDATION PERFORMANCE:
========================================
MSE: 655872.2759
RMSE: 809.8594
MAE: 685.7176
MAPE: 0.38%
R²: -2.2995
Directional Accuracy: 33.33%
SELECTED BEST MODEL: SARIMA
FINAL SARIMA VALIDATION PERFORMANCE:
========================================
MSE: 655872.2759
RMSE: 809.8594
MAE: 685.7176
MAPE: 0.38%
R²: -2.2995
Directional Accuracy: 33.33%
============================================================
COMPREHENSIVE MODEL COMPARISON
============================================================
MSE RMSE MAE MAPE R² DA
ARIMA Validation 428514.3561 654.6101 604.0947 0.6185 -5.3783 31.7757
SARIMA Validation 608951.4528 780.3534 731.0257 0.7416 -8.0640 26.1682
ARIMA Test 658967.5673 811.7682 686.5055 0.3810 -2.3151 31.4815
SARIMA Test 655872.2759 809.8594 685.7176 0.3812 -2.2995 33.3333
Final Model 655872.2759 809.8594 685.7176 0.3812 -2.2995 33.3333
================================================== FUTURE FORECASTING ================================================== Future Forecast (Next 12 weeks): ---------------------------------------- Week 1 (2024-11-03): 907.44 Rs./kg Week 2 (2024-11-10): 907.44 Rs./kg Week 3 (2024-11-17): 907.44 Rs./kg Week 4 (2024-11-24): 907.44 Rs./kg Week 5 (2024-12-01): 907.44 Rs./kg Week 6 (2024-12-08): 907.44 Rs./kg Week 7 (2024-12-15): 907.44 Rs./kg Week 8 (2024-12-22): 907.44 Rs./kg Week 9 (2024-12-29): 907.44 Rs./kg Week 10 (2025-01-05): 907.44 Rs./kg Week 11 (2025-01-12): 907.44 Rs./kg Week 12 (2025-01-19): 907.44 Rs./kg
================================================== MODEL DIAGNOSTICS ================================================== SARIMA Model Diagnostics:
============================================================ FINAL SUMMARY ============================================================ Best Model: SARIMA Model Order: (3, 1, 1)(0, 0, 1, 26) Test RMSE: 809.8594 Test MAE: 685.7176 Test MAPE: 0.38% Test R²: -2.2995 Directional Accuracy: 33.33% Key Insights: - ARIMA models are simpler and faster to train - SARIMA models capture seasonal patterns better - The best model was selected based on test performance - Future forecasts include uncertainty bands based on RMSE
In [70]:
# --- Step 10: Visualization ---
# Create date indices for plotting
train_dates = df.index[:train_size]
val_dates = df.index[train_size:train_size+val_size]
test_dates = df.index[train_size+val_size:train_size+val_size+test_size]
# Final Test Results
plt.figure(figsize=(16, 6))
# ARIMA Actual vs Predicted (Test Set)
plt.plot(test_dates, test_data, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, arima_test_pred, label='ARIMA Predicted', color='red', linestyle='--', linewidth=2)
plt.title('ARIMA: Actual vs Predicted (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/arima_result15.png", dpi=300, bbox_inches='tight')
plt.show()
Out[70]:
(array([19266., 19358., 19448., 19539., 19631., 19723., 19814., 19905.,
19997.]),
[Text(19266.0, 0, '2022-10'),
Text(19358.0, 0, '2023-01'),
Text(19448.0, 0, '2023-04'),
Text(19539.0, 0, '2023-07'),
Text(19631.0, 0, '2023-10'),
Text(19723.0, 0, '2024-01'),
Text(19814.0, 0, '2024-04'),
Text(19905.0, 0, '2024-07'),
Text(19997.0, 0, '2024-10')])
In [86]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm # Progress barimport numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm # Progress bar
1. Load and Normalize Data¶
In [89]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [90]:
np.random.seed(0)
df[' Date'] = pd.to_datetime(df[' Date'])
df.set_index(' Date', inplace=True)
df = df.sort_index()
df.head()
Out[90]:
| State Name | District Name | Market Name | Variety | Group | Arrivals (Tonnes) | Min Price (Rs./Quintal) | Max Price (Rs./Quintal) | Modal Price (Rs./Quintal) | |
|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||
| 2011-01-16 | Kerala | Idukki | Nedumkandam | Other | Spices | 14.0 | 120000 | 150000 | 130000 |
| 2011-01-23 | Kerala | Idukki | Nedumkandam | Other | Spices | 17.0 | 120000 | 150000 | 140000 |
| 2011-01-30 | Kerala | Idukki | Nedumkandam | Other | Spices | 12.0 | 120000 | 150000 | 130000 |
| 2011-02-06 | Kerala | Idukki | Nedumkandam | Other | Spices | 8.5 | 120000 | 150000 | 125000 |
| 2011-02-13 | Kerala | Idukki | Nedumkandam | Other | Spices | 9.2 | 100000 | 115000 | 107500 |
--------------- 1. Remove Outliers ---------------¶
In [92]:
def remove_outliers(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
filtered_df = df[(df[column] >= (Q1 - 1.5 * IQR)) & (df[column] <= (Q3 + 1.5 * IQR))]
return filtered_df
In [93]:
df_clean = remove_outliers(df, 'Modal Price (Rs./Quintal)')
--------------- 2. Check Stationarity ---------------¶
In [95]:
def check_stationarity(df, column):
result = adfuller(df[column])
print(f"ADF Statistic: {result[0]}")
print(f"p-value: {result[1]}")
if result[1] > 0.05:
print("Series is not stationary. Differencing will be applied.")
else:
print("Series is stationary.")
check_stationarity(df_clean, 'Modal Price (Rs./Quintal)')
ADF Statistic: -2.1259554387544832 p-value: 0.23420666194205048 Series is not stationary. Differencing will be applied.
--------------- 3. Differencing (if needed) ---------------¶
In [97]:
df_clean['Differenced'] = df_clean['Modal Price (Rs./Quintal)'].diff().dropna()
In [99]:
check_stationarity(df_clean.dropna(), 'Differenced')
ADF Statistic: -14.327533178188892 p-value: 1.1140991886231331e-26 Series is stationary.
--------------- 5. ACF and PACF Plots ---------------¶
In [103]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
plot_acf(df_clean['Differenced'].dropna(), ax=axes[0], lags=40)
axes[0].set_title("ACF - Differenced Series")
plot_pacf(df_clean['Differenced'].dropna(), ax=axes[1], lags=40, method='ywm')
axes[1].set_title("PACF - Differenced Series")
plt.tight_layout()
plt.show()
--------------- 6. Train-Validation-Test Split ---------------¶
In [108]:
df_clean['Modal Price (Rs./kg)'] = df_clean['Modal Price (Rs./Quintal)']/100
In [110]:
train_size = int(len(df_clean) * 0.7)
val_size = int(len(df_clean) * 0.15)
train = df_clean['Modal Price (Rs./kg)'].iloc[:train_size]
val = df_clean['Modal Price (Rs./kg)'].iloc[train_size:train_size + val_size]
test = df_clean['Modal Price (Rs./kg)'].iloc[train_size + val_size:]
----------------- 7. Time Series Cross-Validation Function (for ARIMA) -----------------¶
In [113]:
def arima_cv(train_series, val_series, p_values, d_values, q_values):
best_score, best_cfg = float("inf"), None
results = []
for p in tqdm(p_values, desc="p loop"):
for d in d_values:
for q in q_values:
order = (p, d, q)
# Skip models with p=0 and q=0 (no AR or MA terms)
if (p == 0 and q == 0):
continue
try:
model = SARIMAX(train_series,
order=order,
seasonal_order=(0,0,0,0), # No seasonality
enforce_stationarity=False,
enforce_invertibility=False)
model_fit = model.fit(disp=False)
forecast = model_fit.forecast(steps=len(val_series))
rmse = np.sqrt(mean_squared_error(val_series, forecast))
results.append((order, rmse))
if rmse < best_score:
best_score = rmse
best_cfg = order
except Exception as e:
# Optional: print(e)
continue
return best_cfg, results
--------------- 8. Find the best ARIMA parameters --------------¶
In [142]:
import pandas as pd
import numpy as np
import pmdarima as pm
from pmdarima import auto_arima
import warnings
warnings.filterwarnings('ignore')
# Load your data
df = pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy\kattapana.xlsx", parse_dates=True)
# Assuming you have a time series column (e.g., 'Modal Price')
# Make sure the data is sorted by date and set date as index if needed
# df = df.set_index('Date_column_name')
# Select your time series data
# Replace 'Modal Price (Rs./Quintal)' with your actual column name
ts_data = df['Modal Price (Rs./Quintal)'].dropna()
print("🔍 Running auto_arima to find optimal ARIMA parameters...")
# Use auto_arima to find the best model
model = auto_arima(ts_data,
seasonal=False, # Non-seasonal data
stationary=False, # Check for stationarity
start_p=0, # Minimum p value
max_p=8, # Maximum p value
start_q=0, # Minimum q value
max_q=8, # Maximum q value
d=None, # Let model determine differencing
test='adf', # Augmented Dickey-Fuller test for stationarity
trace=True, # Show progress
error_action='ignore', # Ignore invalid orders
suppress_warnings=True,
stepwise=True, # Use stepwise algorithm for faster computation
information_criterion='aic' # Use AIC for model selection
)
print(f"\n✅ Best ARIMA Model: {model.order}")
print(f"📊 AIC: {model.aic():.2f}")
print(f"📊 BIC: {model.bic():.2}")
# Fit the best model
best_model = model
# Summary of the best model
print("\n" + "="*50)
print("BEST MODEL SUMMARY:")
print("="*50)
print(best_model.summary())
# Get model parameters
best_cfg = best_model.order
print(f"\n🎯 Optimal ARIMA Order: {best_cfg}")
# Forecast using the best model (example: 10 steps ahead)
forecast_steps = 10
forecast, conf_int = best_model.predict(n_periods=forecast_steps, return_conf_int=True)
print(f"\n📈 {forecast_steps}-step Forecast:")
for i, (point, (lower, upper)) in enumerate(zip(forecast, conf_int)):
print(f"Step {i+1}: ₹{point:.2f} (95% CI: ₹{lower:.2f} - ₹{upper:.2f})")
🔍 Running auto_arima to find optimal ARIMA parameters...
Performing stepwise search to minimize aic
ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=11425.007, Time=0.05 sec
ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=11421.657, Time=0.09 sec
ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=11422.560, Time=0.08 sec
ARIMA(0,1,0)(0,0,0)[0] : AIC=11423.133, Time=0.02 sec
ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=11418.139, Time=0.12 sec
ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=11419.943, Time=0.13 sec
ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=11419.656, Time=0.28 sec
ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=11418.345, Time=0.19 sec
ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=11421.498, Time=0.43 sec
ARIMA(2,1,0)(0,0,0)[0] : AIC=11416.196, Time=0.09 sec
ARIMA(1,1,0)(0,0,0)[0] : AIC=11419.745, Time=0.05 sec
ARIMA(3,1,0)(0,0,0)[0] : AIC=11417.995, Time=0.13 sec
ARIMA(2,1,1)(0,0,0)[0] : AIC=11417.595, Time=0.23 sec
ARIMA(1,1,1)(0,0,0)[0] : AIC=11416.122, Time=0.17 sec
ARIMA(0,1,1)(0,0,0)[0] : AIC=11420.658, Time=0.06 sec
ARIMA(1,1,2)(0,0,0)[0] : AIC=11417.690, Time=0.29 sec
ARIMA(0,1,2)(0,0,0)[0] : AIC=11416.954, Time=0.11 sec
ARIMA(2,1,2)(0,0,0)[0] : AIC=11413.047, Time=0.52 sec
ARIMA(3,1,2)(0,0,0)[0] : AIC=11414.632, Time=0.80 sec
ARIMA(2,1,3)(0,0,0)[0] : AIC=11414.389, Time=0.94 sec
ARIMA(1,1,3)(0,0,0)[0] : AIC=11419.480, Time=0.47 sec
ARIMA(3,1,1)(0,0,0)[0] : AIC=11419.343, Time=0.48 sec
ARIMA(3,1,3)(0,0,0)[0] : AIC=inf, Time=1.42 sec
ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=11415.012, Time=0.59 sec
Best model: ARIMA(2,1,2)(0,0,0)[0]
Total fit time: 7.797 seconds
✅ Best ARIMA Model: (2, 1, 2)
📊 AIC: 11413.05
📊 BIC: 1.1e+04
==================================================
BEST MODEL SUMMARY:
==================================================
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 543
Model: SARIMAX(2, 1, 2) Log Likelihood -5701.524
Date: Fri, 22 Aug 2025 AIC 11413.047
Time: 11:44:13 BIC 11434.523
Sample: 0 HQIC 11421.445
- 543
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.2861 0.120 -2.385 0.017 -0.521 -0.051
ar.L2 0.5864 0.106 5.531 0.000 0.379 0.794
ma.L1 0.3700 0.128 2.895 0.004 0.120 0.620
ma.L2 -0.4451 0.113 -3.930 0.000 -0.667 -0.223
sigma2 8.203e+07 2.15e-09 3.82e+16 0.000 8.2e+07 8.2e+07
===================================================================================
Ljung-Box (L1) (Q): 0.16 Jarque-Bera (JB): 4753.21
Prob(Q): 0.69 Prob(JB): 0.00
Heteroskedasticity (H): 1.41 Skew: 0.60
Prob(H) (two-sided): 0.02 Kurtosis: 17.46
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 6.63e+31. Standard errors may be unstable.
🎯 Optimal ARIMA Order: (2, 1, 2)
📈 10-step Forecast:
Step 1: ₹130178.66 (95% CI: ₹112426.80 - ₹147930.51)
Step 2: ₹130433.51 (95% CI: ₹104254.00 - ₹156613.03)
Step 3: ₹130465.37 (95% CI: ₹96700.31 - ₹164230.43)
Step 4: ₹130605.70 (95% CI: ₹90521.80 - ₹170689.60)
Step 5: ₹130584.24 (95% CI: ₹84497.53 - ₹176670.95)
Step 6: ₹130672.67 (95% CI: ₹79351.79 - ₹181993.55)
Step 7: ₹130634.79 (95% CI: ₹74273.97 - ₹186995.60)
Step 8: ₹130697.48 (95% CI: ₹69826.13 - ₹191568.83)
Step 9: ₹130657.33 (95% CI: ₹65406.79 - ₹195907.87)
Step 10: ₹130705.58 (95% CI: ₹61460.00 - ₹199951.16)
--------------- 9. Fit Best Model ---------------¶
In [144]:
best_model = SARIMAX(train,
order=best_cfg,
seasonal_order=(0,0,0,0), # No seasonality
enforce_stationarity=False,
enforce_invertibility=False)
best_result = best_model.fit(disp=False)
In [146]:
print(best_result.summary())
SARIMAX Results
================================================================================
Dep. Variable: Modal Price (Rs./kg) No. Observations: 373
Model: SARIMAX(2, 1, 2) Log Likelihood -2185.967
Date: Fri, 22 Aug 2025 AIC 4381.933
Time: 11:44:44 BIC 4401.487
Sample: 0 HQIC 4389.701
- 373
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 1.1512 0.316 3.647 0.000 0.533 1.770
ar.L2 -0.4004 0.219 -1.832 0.067 -0.829 0.028
ma.L1 -1.3181 0.326 -4.045 0.000 -1.957 -0.679
ma.L2 0.3979 0.298 1.335 0.182 -0.186 0.982
sigma2 8153.8917 164.324 49.621 0.000 7831.822 8475.962
===================================================================================
Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 11811.78
Prob(Q): 0.82 Prob(JB): 0.00
Heteroskedasticity (H): 2.03 Skew: 2.43
Prob(H) (two-sided): 0.00 Kurtosis: 30.29
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
--------------- 10. Forecast on Test Set ---------------¶
In [149]:
forecast = best_result.forecast(steps=len(test))
9. Create indices for plotting¶
In [152]:
train_idx = df_clean.iloc[:len(train)].index
val_idx = df_clean.iloc[len(train):len(train)+len(val)].index
test_idx = df_clean.iloc[len(train)+len(val):len(train)+len(val)+len(test)].index
11. Evaluation Metrics¶
In [155]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def safe_mape(y_true, y_pred):
mask = y_true != 0
return (np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])).mean() * 100
def directional_accuracy(y_true, y_pred):
true_direction = np.sign(np.diff(y_true))
pred_direction = np.sign(np.diff(y_pred))
return np.mean(true_direction == pred_direction) * 100
# Compute metrics
rmse = np.sqrt(mean_squared_error(test, forecast))
mae = mean_absolute_error(test, forecast)
mape = safe_mape(test.values, forecast.values)
r2 = r2_score(test, forecast)
da = directional_accuracy(test.values, forecast.values)
# Print all metrics
print(f"✅ RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}, Directional Accuracy: {da:.2f}%")
✅ RMSE: 206.9358, MAE: 183.2453, MAPE: 17.86%, R²: -0.3084, Directional Accuracy: 22.50%
In [157]:
results_df
Out[157]:
| Date | Actual | Predicted | |
|---|---|---|---|
| 0 | 2020-10-28 | 1500.0 | 1667.237235 |
| 1 | 2020-11-25 | 1500.0 | 1637.548541 |
| 2 | 2020-12-04 | 1500.0 | 1610.781425 |
| 3 | 2020-12-11 | 1500.0 | 1586.648382 |
| 4 | 2020-12-26 | 1600.0 | 1564.890201 |
| ... | ... | ... | ... |
| 90 | 2023-08-21 | 2000.0 | 1365.571500 |
| 91 | 2023-08-22 | 1900.0 | 1365.568849 |
| 92 | 2024-02-29 | 1350.0 | 1365.566458 |
| 93 | 2024-03-03 | 1300.0 | 1365.564302 |
| 94 | 2024-08-27 | 2200.0 | 1365.562359 |
95 rows × 3 columns
In [158]:
results_df = pd.DataFrame({
'Index': test.index,
'Test_Values': test.values.flatten(),
'Predicted_Values': forecast.values.flatten()
})
results_df.to_csv('fixed_predictions.tsv', index=False, sep='\t')
10. Plotting actual vs predicted values (No scaling, so use directly)¶
In [162]:
plt.figure(figsize=(15,7))
plt.plot(train_idx, train, label='Train', color='blue')
plt.plot(val_idx, val, label='Validation', color='green')
plt.plot(test_idx, test, label='Test (Actual)', color='black')
plt.plot(test_idx, forecast, label='Test (Predicted)', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price (Rs./Quintal)')
plt.title('ARIMA Model Forecast: Train, Validation, Test and Predictions')
plt.legend()
plt.grid(True)
plt.show()
In [163]:
plt.figure(figsize=(15,7))
plt.plot(test_idx, test, label='Test (Actual)', color='black')
plt.plot(test_idx, forecast, label='Test (Predicted)', color='red', linestyle='--')
Out[163]:
[<matplotlib.lines.Line2D at 0x1dce6b4a780>]
13. Diagnostic Plots¶
In [167]:
best_result.plot_diagnostics(figsize=(15, 12))
plt.show()
--- SARIMA---¶
--- Import libraries for SARIMA----¶
In [155]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm # Progress barimport numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm # Progress bar
1. Load and Normalize Data¶
In [157]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [158]:
np.random.seed(0)
df[' Date'] = pd.to_datetime(df[' Date'])
df.set_index(' Date', inplace=True)
df = df.sort_index()
df.head()
Out[158]:
| State Name | District Name | Market Name | Variety | Group | Arrivals (Tonnes) | Min Price (Rs./Quintal) | Max Price (Rs./Quintal) | Modal Price (Rs./Quintal) | |
|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||
| 2011-01-16 | Kerala | Idukki | Nedumkandam | Other | Spices | 14.0 | 120000 | 150000 | 130000 |
| 2011-01-23 | Kerala | Idukki | Nedumkandam | Other | Spices | 17.0 | 120000 | 150000 | 140000 |
| 2011-01-30 | Kerala | Idukki | Nedumkandam | Other | Spices | 12.0 | 120000 | 150000 | 130000 |
| 2011-02-06 | Kerala | Idukki | Nedumkandam | Other | Spices | 8.5 | 120000 | 150000 | 125000 |
| 2011-02-13 | Kerala | Idukki | Nedumkandam | Other | Spices | 9.2 | 100000 | 115000 | 107500 |
In [159]:
# --------------- 4. Seasonality Decomposition (Multiplicative) ---------------
# Assuming your data is weekly and yearly seasonality (period=52 weeks)
result = seasonal_decompose(df['Modal Price (Rs./Quintal)'], model='multiplicative', period=13)
fig = result.plot()
fig.set_size_inches(16, 9)
plt.suptitle("Seasonal Decomposition of Modal Price (Multiplicative Model)", fontsize=18)
plt.show()
1. Remove Outliers (e.g., using IQR method)¶
In [161]:
def remove_outliers(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
filtered_df = df[(df[column] >= (Q1 - 1.5 * IQR)) & (df[column] <= (Q3 + 1.5 * IQR))]
return filtered_df
Assuming 'Modal Price (Rs./Quintal)' is the target column¶
In [163]:
df_clean = remove_outliers(df, 'Modal Price (Rs./Quintal)')
2. Check for Stationarity (ADF Test)¶
In [167]:
def check_stationarity(df, column):
result = adfuller(df[column])
print(f"ADF Statistic: {result[0]}")
print(f"p-value: {result[1]}")
if result[1] > 0.05:
print("Series is not stationary. Differencing will be applied.")
else:
print("Series is stationary.")
check_stationarity(df_clean, 'Modal Price (Rs./Quintal)')
ADF Statistic: -2.1259554387544832 p-value: 0.23420666194205048 Series is not stationary. Differencing will be applied.
Apply differencing if the series is not stationary¶
In [170]:
df_clean['Differenced'] = df_clean['Modal Price (Rs./Quintal)'].diff().dropna()
Check again if differencing made it stationary¶
In [172]:
check_stationarity(df_clean.dropna(), 'Differenced')
ADF Statistic: -14.327533178188892 p-value: 1.1140991886231331e-26 Series is stationary.
4. ACF and PACF Plots to determine p, q, P, Q for SARIMA¶
In [174]:
plot_acf(df_clean['Differenced'].dropna())
plt.title("ACF Plot")
plt.show()
In [175]:
plot_pacf(df_clean['Differenced'].dropna())
plt.title("PACF Plot")
plt.show()
In [177]:
result = seasonal_decompose(df_clean['Modal Price (Rs./Quintal)'], model='multiplicative', period=13)
fig = result.plot()
fig.set_size_inches(16, 9)
plt.suptitle("Seasonal Decomposition of Modal Price (Multiplicative Model)", fontsize=18)
plt.show()
5. Split the data into Train, Validation, and Test¶
In [179]:
df_clean['Price (Rs./kg)'] = df_clean['Modal Price (Rs./Quintal)'] / 100
In [181]:
train_size = int(len(df_clean) * 0.7)
val_size = int(len(df_clean) * 0.15)
train = df_clean['Price (Rs./kg)'].iloc[:train_size]
val = df_clean['Price (Rs./kg)'].iloc[train_size:train_size + val_size]
test = df_clean['Price (Rs./kg)'].iloc[train_size + val_size:]
----------------- 7. Time Series Cross-Validation Function (for SARIMA) -----------------¶
In [193]:
def sarima_cv(train_series, p_values, d_values, q_values, P_values, D_values, Q_values, m):
best_score, best_cfg = float("inf"), None
results = []
for p in tqdm(p_values, desc="p loop"):
for d in d_values:
for q in q_values:
for P in P_values:
for D in D_values:
for Q in Q_values:
if (p + q + P + Q) == 0: # avoid no model
continue
order = (p, d, q)
seasonal_order = (P, D, Q, m)
try:
model = SARIMAX(train_series,
order=order,
seasonal_order=seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False)
model_fit = model.fit(disp=False)
aic = model_fit.aic # use AIC if needed
#forecast = model_fit.forecast(steps=len(val))
#rmse = np.sqrt(mean_squared_error(val, forecast))
results.append((order, seasonal_order, aic))
if aic < best_score:
best_score = aic
best_cfg = (order, seasonal_order)
except:
continue
return best_cfg, results
--------------- 8. Find the best SARIMA parameters ---------------¶
In [196]:
p_values = range(0, 5)
d_values = range(0, 1)
q_values = range(0, 5)
P_values = range(0, 3)
D_values = range(0, 1)
Q_values = range(0, 3)
m = 26 # weekly seasonality
In [198]:
# Alternative with automatic seasonal period detection
model = auto_arima(ts_data,
seasonal=True,
m=26, # Known seasonal period (26 weeks)
# m='auto', # Uncomment to let pmdarima detect seasonal period
start_p=0, max_p=6,
start_q=0, max_q=6,
start_P=0, max_P=3,
start_Q=0, max_Q=3,
max_d=2, # Maximum regular differencing
max_D=1, # Maximum seasonal differencing
test='adf',
trace=True,
error_action='warn', # Warn instead of ignore errors
suppress_warnings=False,
stepwise=True,
information_criterion='aic',
n_jobs=-1, # Use all CPU cores
n_fits=30 # Number of models to try
)
# Check if seasonal component is significant
if model.seasonal_order[1] > 0 or model.seasonal_order[3] > 0:
print("✅ Significant seasonal pattern detected")
else:
print("⚠️ No significant seasonal pattern found")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[198], line 2 1 # Alternative with automatic seasonal period detection ----> 2 model = auto_arima(ts_data, 3 seasonal=True, 4 m=26, # Known seasonal period (26 weeks) 5 # m='auto', # Uncomment to let pmdarima detect seasonal period 6 start_p=0, max_p=6, 7 start_q=0, max_q=6, 8 start_P=0, max_P=3, 9 start_Q=0, max_Q=3, 10 max_d=2, # Maximum regular differencing 11 max_D=1, # Maximum seasonal differencing 12 test='adf', 13 trace=True, 14 error_action='warn', # Warn instead of ignore errors 15 suppress_warnings=False, 16 stepwise=True, 17 information_criterion='aic', 18 n_jobs=-1, # Use all CPU cores 19 n_fits=30 # Number of models to try 20 ) 22 # Check if seasonal component is significant 23 if model.seasonal_order[1] > 0 or model.seasonal_order[3] > 0: NameError: name 'ts_data' is not defined
--------------- 9. Fit Best Model ---------------¶
In [221]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
# Assuming best_cfg is in the format: ((p,d,q), (P,D,Q,m))
best_cfg = ((2, 1, 2), (0, 0, 0, 26))
# Correct way to pass the parameters
best_model = SARIMAX(train,
order=best_cfg[0], # This should be (p,d,q) tuple
seasonal_order=best_cfg[1], # This should be (P,D,Q,m) tuple
enforce_stationarity=False,
enforce_invertibility=False)
# Fit the model (only need to fit once)
best_result = best_model.fit(disp=False)
print("✅ SARIMA model fitted successfully!")
print(f"Model order: {best_cfg[0]}")
print(f"Seasonal order: {best_cfg[1]}")
✅ SARIMA model fitted successfully! Model order: (2, 1, 2) Seasonal order: (0, 0, 0, 26)
In [225]:
print(best_result.summary())
SARIMAX Results
==============================================================================
Dep. Variable: Price (Rs./kg) No. Observations: 373
Model: SARIMAX(2, 1, 2) Log Likelihood -2185.967
Date: Fri, 22 Aug 2025 AIC 4381.933
Time: 11:47:09 BIC 4401.487
Sample: 0 HQIC 4389.701
- 373
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 1.1512 0.316 3.647 0.000 0.533 1.770
ar.L2 -0.4004 0.219 -1.832 0.067 -0.829 0.028
ma.L1 -1.3181 0.326 -4.045 0.000 -1.957 -0.679
ma.L2 0.3979 0.298 1.335 0.182 -0.186 0.982
sigma2 8153.8917 164.324 49.621 0.000 7831.822 8475.962
===================================================================================
Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 11811.78
Prob(Q): 0.82 Prob(JB): 0.00
Heteroskedasticity (H): 2.03 Skew: 2.43
Prob(H) (two-sided): 0.00 Kurtosis: 30.29
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
--------------- 10. Forecast on Test Set ---------------¶
In [230]:
forecast = best_result.forecast(steps=len(test))
9. Create indices for plotting¶
In [233]:
train_idx = df_clean.iloc[:len(train)].index
val_idx = df_clean.iloc[len(train):len(train)+len(val)].index
test_idx = df_clean.iloc[len(train)+len(val):len(train)+len(val)+len(test)].index
In [235]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def safe_mape(y_true, y_pred):
mask = y_true != 0
return (np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])).mean() * 100
def directional_accuracy(y_true, y_pred):
true_direction = np.sign(np.diff(y_true))
pred_direction = np.sign(np.diff(y_pred))
return np.mean(true_direction == pred_direction) * 100
# Compute metrics
rmse = np.sqrt(mean_squared_error(test, forecast))
mae = mean_absolute_error(test, forecast)
mape = safe_mape(test.values, forecast.values)
r2 = r2_score(test, forecast)
da = directional_accuracy(test.values, forecast.values)
# Print all metrics
print(f"✅ RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}, Directional Accuracy: {da:.2f}%")
✅ RMSE: 206.9358, MAE: 183.2453, MAPE: 17.86%, R²: -0.3084, Directional Accuracy: 22.50%
In [125]:
# Create comparison DataFrame
results_df = pd.DataFrame({
'Date': test.index, # Assuming test has datetime index
'Test_Values': test.values.flatten(),
'Predicted_Values': forecast.values.flatten()
})
# Print first 20 rows for quick inspection
print("\nTest vs Predicted Values (first 20 rows):")
print(results_df.head(20).to_string(index=False))
# Save as TSV (tab-separated) for Excel
results_df.to_csv('test_vs_predicted.tsv', sep='\t', index=False)
print("\n✅ Results saved as 'test_vs_predicted.tsv'")
Test vs Predicted Values (first 20 rows):
Date Test_Values Predicted_Values
2021-02-05 1500.0 1185.419340
2021-02-11 1550.0 1132.433480
2021-02-17 1400.0 1149.700700
2021-02-24 1400.0 1136.296329
2021-03-02 1350.0 1149.400293
2021-03-06 1350.0 1139.591248
2021-03-16 1300.0 1139.089280
2021-03-20 1300.0 1137.811439
2021-03-27 1300.0 1148.414509
2021-04-02 1250.0 1142.703195
2021-04-17 1100.0 1147.486854
2021-04-22 1050.0 1143.880142
2021-05-04 1050.0 1151.454511
2021-05-06 900.0 1147.205248
2021-08-10 1100.0 1150.900093
2021-08-21 1050.0 1149.831182
2021-08-25 1050.0 1152.205769
2021-08-29 1100.0 1151.180134
2021-09-04 1100.0 1155.343164
2021-09-21 950.0 1154.786385
✅ Results saved as 'test_vs_predicted.tsv'
10. Plotting actual vs predicted values (No scaling, so use directly)¶
In [237]:
plt.figure(figsize=(15,7))
plt.plot(train_idx, train, label='Train', color='blue')
plt.plot(val_idx, val, label='Validation', color='green')
plt.plot(test_idx, test, label='Test (Actual)', color='black')
plt.plot(test_idx, forecast, label='Test (Predicted)', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price (Rs./Quintal)')
plt.title('SARIMA Model Forecast: Train, Validation, Test and Predictions')
plt.legend()
plt.grid(True)
plt.show()
In [238]:
plt.figure(figsize=(15,7))
plt.plot(test_idx, test, label='Test (Actual)', color='black')
plt.plot(test_idx, forecast, label='Test (Predicted)', color='red', linestyle='--')
Out[238]:
[<matplotlib.lines.Line2D at 0x1dce41915e0>]
13. Diagnostic Plots¶
In [130]:
sarima_result.plot_diagnostics(figsize=(15, 12))
plt.show()
--- LSTM---¶
--- Import libraries for LSTM----¶
In [ ]:
In [140]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt
1. Load and Normalize Data¶
In [142]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [143]:
np.random.seed(0)
df[' Date'] = pd.to_datetime(df[' Date'])
df.set_index(' Date', inplace=True)
df = df.sort_index()
df.head()
Out[143]:
| State Name | District Name | Market Name | Variety | Group | Arrivals (Tonnes) | Min Price (Rs./Quintal) | Max Price (Rs./Quintal) | Modal Price (Rs./Quintal) | |
|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||
| 2010-06-08 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 13.0 | 1500 | 1700 | 1650 |
| 2010-06-13 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 11.0 | 1300 | 1750 | 1500 |
| 2010-06-20 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1400 | 1800 | 1600 |
| 2010-06-27 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1300 | 1800 | 1650 |
| 2010-07-11 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1400 | 1850 | 1600 |
1. Normalize data¶
In [145]:
df['Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
In [146]:
scaler = MinMaxScaler()
price_scaled = scaler.fit_transform(df[['Price (Rs./kg)']])
2. Prepare supervised learning data (X and y)¶
In [148]:
def create_sequences(data, lookback):
X, y = [], []
for i in range(len(data) - lookback):
X.append(data[i:i+lookback])
y.append(data[i+lookback])
return np.array(X), np.array(y)
lookback = 10 # You can adjust this
X, y = create_sequences(price_scaled, lookback)
3. Split into train, val, test¶
In [150]:
train_size = int(len(X) * 0.7)
val_size = int(len(X) * 0.15)
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]
4. Reshape for GRU [samples, timesteps, features]¶
In [152]:
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
4. Build LSTM model¶
In [154]:
def build_lstm_model(hp):
model = Sequential()
model.add(LSTM(
units=hp.Int('units', min_value=32, max_value=128, step=16),
activation='tanh',
input_shape=(X_train.shape[1], X_train.shape[2])
))
model.add(Dense(1))
model.compile(
optimizer=tf.keras.optimizers.Adam(
learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
),
loss='mse'
)
return model
5. Hypertuning with TimeSeriesSplit¶
In [156]:
tuner = kt.Hyperband(
build_lstm_model,
objective='val_loss',
max_epochs=50,
factor=3,
directory='lstm_tuner',
project_name='price_forecasting'
)
early_stop = EarlyStopping(monitor='val_loss', patience=5)
tuner.search(X_train, y_train,
epochs=50,
validation_data=(X_val, y_val),
callbacks=[early_stop],
verbose=1)
Reloading Tuner from lstm_tuner\price_forecasting\tuner0.json
6. Train final model¶
In [158]:
best_hp = tuner.get_best_hyperparameters(1)[0]
model = build_lstm_model(best_hp)
history = model.fit(
X_train, y_train,
validation_data=(X_val, y_val),
epochs=50,
callbacks=[early_stop],
verbose=1
)
model.summary()
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
Epoch 1/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 5s 48ms/step - loss: 0.0361 - val_loss: 0.0791 Epoch 2/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0054 - val_loss: 0.0836 Epoch 3/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0053 - val_loss: 0.0468 Epoch 4/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0041 - val_loss: 0.0261 Epoch 5/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0032 - val_loss: 0.0166 Epoch 6/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0040 - val_loss: 0.0152 Epoch 7/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0028 - val_loss: 0.0148 Epoch 8/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0028 - val_loss: 0.0168 Epoch 9/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0030 - val_loss: 0.0194 Epoch 10/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0024 - val_loss: 0.0139 Epoch 11/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0023 - val_loss: 0.0120 Epoch 12/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0027 - val_loss: 0.0169 Epoch 13/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0026 - val_loss: 0.0148 Epoch 14/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0030 - val_loss: 0.0133 Epoch 15/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0025 - val_loss: 0.0115 Epoch 16/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0026 - val_loss: 0.0123 Epoch 17/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0029 - val_loss: 0.0143 Epoch 18/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0039 - val_loss: 0.0116 Epoch 19/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.0024 - val_loss: 0.0108 Epoch 20/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0028 - val_loss: 0.0118 Epoch 21/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.0022 - val_loss: 0.0106 Epoch 22/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0029 - val_loss: 0.0132 Epoch 23/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0023 - val_loss: 0.0104 Epoch 24/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0025 - val_loss: 0.0123 Epoch 25/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0023 - val_loss: 0.0110 Epoch 26/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0022 - val_loss: 0.0118 Epoch 27/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0026 - val_loss: 0.0109 Epoch 28/50 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 0.0028 - val_loss: 0.0113
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ lstm (LSTM) │ (None, 32) │ 4,352 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 1) │ 33 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 13,157 (51.40 KB)
Trainable params: 4,385 (17.13 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 8,772 (34.27 KB)
7. Forecast¶
In [160]:
y_pred_scaled = model.predict(X_test)
# Inverse transform
train_inv = scaler.inverse_transform(y_train.reshape(-1, 1)).flatten()
val_inv = scaler.inverse_transform(y_val.reshape(-1, 1)).flatten()
test_inv = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_pred = scaler.inverse_transform(y_pred_scaled).flatten()
3/3 ━━━━━━━━━━━━━━━━━━━━ 1s 249ms/step
In [161]:
# 8. Create indices for plotting
train_idx = df.iloc[:len(y_train)].index
val_idx = df.iloc[len(y_train):len(y_train)+len(y_val)].index
test_idx = df.iloc[len(y_train)+len(y_val):len(y_train)+len(y_val)+len(y_test)].index
10. Evaluation¶
In [163]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def safe_mape(y_true, y_pred):
mask = y_true != 0
return (np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])).mean() * 100
def directional_accuracy(y_true, y_pred):
true_direction = np.sign(np.diff(y_true))
pred_direction = np.sign(np.diff(y_pred))
return np.mean(true_direction == pred_direction) * 100
# Evaluation metrics
rmse = np.sqrt(mean_squared_error(test_inv, y_pred))
mae = mean_absolute_error(test_inv, y_pred)
mape = safe_mape(test_inv, y_pred)
r2 = r2_score(test_inv, y_pred)
da = directional_accuracy(test_inv, y_pred)
# Print results
print(f"✅ RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%, R²: {r2:.4f}, Directional Accuracy: {da:.2f}%")
✅ RMSE: 164.2725, MAE: 65.7785, MAPE: 4.94%, R²: 0.6425, Directional Accuracy: 25.00%
In [164]:
# Create a DataFrame with true and predicted values
results_df = pd.DataFrame({
'True_Values': test_inv,
'Predicted_Values': y_pred
})
# Save to TSV file
results_df.to_csv('true_vs_predicted.tsv', sep='\t', index=False)
print("\nSaved true vs predicted values to 'true_vs_predicted.tsv'")
Saved true vs predicted values to 'true_vs_predicted.tsv'
9. Plot¶
In [166]:
plt.figure(figsize=(15,7))
plt.plot(train_idx, train_inv, label='Train', color='blue')
plt.plot(val_idx, val_inv, label='Validation', color='green')
plt.plot(test_idx, test_inv, label='Test (Actual)', color='black')
plt.plot(test_idx, y_pred, label='Test (Predicted)', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price (Rs./Quintal)')
plt.title('LSTM Model Forecast: Train, Validation, Test and Predictions')
plt.legend()
plt.grid(True)
plt.show()
In [167]:
plt.figure(figsize=(15,7))
plt.plot(test_idx, test_inv, label='Test (Actual)', color='black')
plt.plot(test_idx, y_pred, label='Test (Predicted)', color='red', linestyle='--')
Out[167]:
[<matplotlib.lines.Line2D at 0x2389ee2a7b0>]
In [168]:
# 11. Calculate residuals
residuals = test_inv - y_pred
# 12. Plot residuals
plt.figure(figsize=(15,7))
plt.plot(test_idx, residuals, label='Residuals', color='purple')
plt.axhline(0, color='black', linestyle='--') # Zero line for reference
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.title('LSTM Model Residuals Plot')
plt.legend()
plt.grid(True)
plt.show()
In [13]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf
# Suppress warnings
warnings.filterwarnings("ignore")
# Load and preprocess data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
print(f"Original data length: {len(data)}")
# --- Step 1: Data Preparation for LSTM ---
# Use original data directly (no EMD decomposition)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.reshape(-1, 1))
# Create sequences for LSTM
def create_sequences(data, lookback=52):
X, y = [], []
for i in range(lookback, len(data)):
X.append(data[i-lookback:i])
y.append(data[i])
return np.array(X), np.array(y)
lookback = 52 # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)
# Reshape for LSTM [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
# --- Step 2: Hyperparameter Tuning for LSTM ---
def build_model(hp):
model = Sequential()
# Number of LSTM layers
for i in range(hp.Int('num_layers', 1, 3)):
model.add(LSTM(
units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
))
model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
# Dense layers with ReLU activation
for i in range(hp.Int('dense_layers', 0, 2)):
model.add(Dense(
units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
activation='relu'
))
model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
model.add(Dense(1, activation='linear'))
model.compile(
optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
loss='mse',
metrics=['mae']
)
return model
print("\nStarting LSTM hyperparameter tuning...")
tuner = RandomSearch(
build_model,
objective='val_loss',
max_trials=15,
executions_per_trial=2,
directory='lstm_tuning',
project_name='cardamom_lstm'
)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
tuner.search(
X_train, y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of LSTM layers: {best_hp.get('num_layers')}")
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
print(f"LSTM layer {i+1} units: {best_hp.get(f'units_{i}')}")
print(f"LSTM layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")
# --- Step 3: Build and Train Final LSTM Model ---
final_model = tuner.hypermodel.build(best_hp)
print("\nTraining final LSTM model...")
history = final_model.fit(
X_train, y_train,
epochs=200,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# --- Step 4: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()
# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
# Get actual values (original scale)
y_actual = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
# --- Step 5: Evaluation ---
def evaluate_forecast(actual, forecast):
"""Comprehensive forecast evaluation"""
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
mape = mean_absolute_percentage_error(actual, forecast)
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
return {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'MAPE': mape,
'R²': r2,
'Directional Accuracy': da
}
# Evaluate on original data
metrics = evaluate_forecast(y_actual, y_pred)
print("\n" + "="*60)
print("LSTM MODEL TRAINING SUMMARY")
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print("\nLSTM Model Architecture:")
final_model.summary()
print("\n" + "="*60)
print("EVALUATION METRICS")
print("="*60)
for metric, value in metrics.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
# --- Step 6: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
plt.figure(figsize=(18, 12))
# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('LSTM Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Full Data with Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', color='blue', alpha=0.7)
plt.plot(test_dates, y_pred, label='LSTM Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original Data vs LSTM Forecast')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates,
y_pred - metrics['RMSE'],
y_pred + metrics['RMSE'],
alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - LSTM Model (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 7: Residual Analysis ---
residuals = y_actual - y_pred
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('LSTM Residuals Over Time')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result4.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('LSTM Residual Distribution')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result5.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result6.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result7.png", dpi=300, bbox_inches='tight')
plt.show()
print("\nLSTM Residual Analysis:")
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")
# --- Step 8: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
"""Forecast future values"""
forecasts = []
current_sequence = last_sequence.copy()
for _ in range(steps):
prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
forecasts.append(prediction)
# Update sequence
new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
current_sequence = new_sequence
# Inverse transform
forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
return future_dates, forecasts
# Forecast next 12 weeks
try:
last_sequence = scaled_data[-lookback:]
future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)
print("\n" + "="*50)
print("FUTURE FORECAST - LSTM MODEL (NEXT 12 WEEKS)")
print("="*50)
for date, price in zip(future_dates, future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
# Plot future forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
plt.plot(future_dates, future_prices, label='LSTM Future Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title('LSTM Future Price Forecast (Next 12 Weeks)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/lstm_result8.png", dpi=300, bbox_inches='tight')
plt.show()
except Exception as e:
print(f"Future forecasting failed: {e}")
# --- LSTM Benefits Summary ---
print("\n" + "="*60)
print("LSTM MODEL ADVANTAGES")
print("="*60)
print("1. Long-term Memory: Handles long-term dependencies effectively")
print("2. Sequence Learning: Excellent at learning temporal patterns")
print("3. Gate Mechanism: Input, forget, and output gates control information flow")
print("4. Vanishing Gradient Solution: Better than simple RNNs for long sequences")
print("5. Non-linear Modeling: Captures complex non-linear relationships")
print("6. Robustness: Handles noise and missing data well")
print("7. Proven Performance: Extensive successful applications in time series")
print("8. Flexibility: Can model various time series patterns and seasonalities")
print("9. Automatic Feature Learning: Learns relevant features from raw data")
print("10. Scalability: Can handle large datasets efficiently")
# --- Additional: Training vs Validation Performance Analysis ---
print("\n" + "="*60)
print("TRAINING PERFORMANCE ANALYSIS")
print("="*60)
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]
final_train_mae = history.history['mae'][-1]
final_val_mae = history.history['val_mae'][-1]
print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")
print(f"Final Training MAE: {final_train_mae:.4f}")
print(f"Final Validation MAE: {final_val_mae:.4f}")
# Check for overfitting
if final_val_loss > final_train_loss * 1.1:
print("Warning: Potential overfitting detected (validation loss significantly higher than training loss)")
else:
print("Good: Model shows no signs of overfitting")
Original data length: 722 Training sequences: (468, 52, 1) Validation sequences: (100, 52, 1) Test sequences: (102, 52, 1) Starting LSTM hyperparameter tuning... Reloading Tuner from lstm_tuning\cardamom_lstm\tuner0.json Best Hyperparameters: Number of LSTM layers: 2 Learning rate: 0.0010810344243983956 LSTM layer 1 units: 160 LSTM layer 1 dropout: 0.5 LSTM layer 2 units: 32 LSTM layer 2 dropout: 0.30000000000000004 Training final LSTM model... Epoch 1/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 7s 92ms/step - loss: 0.0615 - mae: 0.1784 - val_loss: 0.0061 - val_mae: 0.0665 Epoch 2/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0237 - mae: 0.0979 - val_loss: 0.0042 - val_mae: 0.0557 Epoch 3/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0151 - mae: 0.0725 - val_loss: 0.0018 - val_mae: 0.0314 Epoch 4/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0117 - mae: 0.0629 - val_loss: 0.0015 - val_mae: 0.0269 Epoch 5/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0112 - mae: 0.0615 - val_loss: 0.0023 - val_mae: 0.0344 Epoch 6/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0117 - mae: 0.0613 - val_loss: 0.0025 - val_mae: 0.0381 Epoch 7/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0077 - mae: 0.0516 - val_loss: 0.0017 - val_mae: 0.0294 Epoch 8/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0080 - mae: 0.0528 - val_loss: 0.0015 - val_mae: 0.0261 Epoch 9/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0084 - mae: 0.0515 - val_loss: 0.0014 - val_mae: 0.0244 Epoch 10/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0076 - mae: 0.0479 - val_loss: 0.0018 - val_mae: 0.0310 Epoch 11/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - loss: 0.0118 - mae: 0.0612 - val_loss: 0.0023 - val_mae: 0.0381 Epoch 12/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0081 - mae: 0.0503 - val_loss: 0.0029 - val_mae: 0.0462 Epoch 13/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0113 - mae: 0.0575 - val_loss: 0.0013 - val_mae: 0.0234 Epoch 14/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0089 - mae: 0.0499 - val_loss: 0.0018 - val_mae: 0.0327 Epoch 15/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0103 - mae: 0.0624 - val_loss: 0.0041 - val_mae: 0.0556 Epoch 16/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0094 - mae: 0.0592 - val_loss: 0.0018 - val_mae: 0.0304 Epoch 17/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0064 - mae: 0.0448 - val_loss: 0.0015 - val_mae: 0.0260 Epoch 18/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0079 - mae: 0.0499 - val_loss: 0.0014 - val_mae: 0.0248 Epoch 19/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0073 - mae: 0.0461 - val_loss: 0.0013 - val_mae: 0.0230 Epoch 20/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0086 - mae: 0.0479 - val_loss: 0.0013 - val_mae: 0.0229 Epoch 21/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0089 - mae: 0.0491 - val_loss: 0.0016 - val_mae: 0.0269 Epoch 22/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0072 - mae: 0.0461 - val_loss: 0.0013 - val_mae: 0.0225 Epoch 23/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0108 - mae: 0.0562 - val_loss: 0.0015 - val_mae: 0.0271 Epoch 24/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0060 - mae: 0.0453 - val_loss: 0.0018 - val_mae: 0.0328 Epoch 25/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0125 - mae: 0.0581 - val_loss: 0.0017 - val_mae: 0.0262 Epoch 26/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0057 - mae: 0.0462 - val_loss: 0.0015 - val_mae: 0.0273 Epoch 27/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0067 - mae: 0.0459 - val_loss: 0.0012 - val_mae: 0.0221 Epoch 28/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0086 - mae: 0.0482 - val_loss: 0.0013 - val_mae: 0.0244 Epoch 29/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0069 - mae: 0.0450 - val_loss: 0.0052 - val_mae: 0.0629 Epoch 30/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0077 - mae: 0.0517 - val_loss: 0.0018 - val_mae: 0.0307 Epoch 31/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0082 - mae: 0.0526 - val_loss: 0.0014 - val_mae: 0.0280 Epoch 32/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0086 - mae: 0.0507 - val_loss: 0.0013 - val_mae: 0.0225 Epoch 33/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0091 - mae: 0.0497 - val_loss: 0.0025 - val_mae: 0.0396 Epoch 34/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0058 - mae: 0.0423 - val_loss: 0.0014 - val_mae: 0.0248 Epoch 35/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0103 - mae: 0.0537 - val_loss: 0.0014 - val_mae: 0.0263 Epoch 36/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0080 - mae: 0.0482 - val_loss: 0.0021 - val_mae: 0.0381 Epoch 37/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0081 - mae: 0.0498 - val_loss: 0.0014 - val_mae: 0.0280 Epoch 38/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0101 - mae: 0.0525 - val_loss: 0.0011 - val_mae: 0.0219 Epoch 39/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0080 - mae: 0.0462 - val_loss: 0.0014 - val_mae: 0.0251 Epoch 40/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0081 - mae: 0.0434 - val_loss: 0.0016 - val_mae: 0.0279 Epoch 41/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0067 - mae: 0.0433 - val_loss: 0.0016 - val_mae: 0.0285 Epoch 42/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0053 - mae: 0.0413 - val_loss: 0.0012 - val_mae: 0.0248 Epoch 43/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0059 - mae: 0.0442 - val_loss: 0.0012 - val_mae: 0.0221 Epoch 44/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0104 - mae: 0.0519 - val_loss: 0.0011 - val_mae: 0.0216 Epoch 45/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0053 - mae: 0.0436 - val_loss: 0.0013 - val_mae: 0.0256 Epoch 46/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0066 - mae: 0.0432 - val_loss: 0.0021 - val_mae: 0.0392 Epoch 47/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0072 - mae: 0.0449 - val_loss: 0.0011 - val_mae: 0.0209 Epoch 48/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0070 - mae: 0.0441 - val_loss: 0.0013 - val_mae: 0.0258 Epoch 49/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0064 - mae: 0.0469 - val_loss: 0.0032 - val_mae: 0.0495 Epoch 50/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0071 - mae: 0.0485 - val_loss: 0.0011 - val_mae: 0.0209 Epoch 51/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0062 - mae: 0.0413 - val_loss: 0.0011 - val_mae: 0.0207 Epoch 52/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0057 - mae: 0.0395 - val_loss: 0.0013 - val_mae: 0.0271 Epoch 53/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0062 - mae: 0.0429 - val_loss: 0.0010 - val_mae: 0.0200 Epoch 54/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0064 - mae: 0.0424 - val_loss: 9.9059e-04 - val_mae: 0.0199 Epoch 55/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0075 - mae: 0.0460 - val_loss: 0.0012 - val_mae: 0.0232 Epoch 56/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0061 - mae: 0.0415 - val_loss: 0.0026 - val_mae: 0.0424 Epoch 57/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0051 - mae: 0.0416 - val_loss: 0.0011 - val_mae: 0.0208 Epoch 58/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0049 - mae: 0.0398 - val_loss: 0.0023 - val_mae: 0.0370 Epoch 59/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0053 - mae: 0.0413 - val_loss: 0.0017 - val_mae: 0.0305 Epoch 60/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0074 - mae: 0.0465 - val_loss: 0.0010 - val_mae: 0.0208 Epoch 61/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0065 - mae: 0.0424 - val_loss: 0.0010 - val_mae: 0.0202 Epoch 62/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0067 - mae: 0.0424 - val_loss: 8.4845e-04 - val_mae: 0.0185 Epoch 63/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0060 - mae: 0.0449 - val_loss: 0.0021 - val_mae: 0.0397 Epoch 64/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0069 - mae: 0.0465 - val_loss: 0.0045 - val_mae: 0.0634 Epoch 65/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0081 - mae: 0.0554 - val_loss: 9.3334e-04 - val_mae: 0.0195 Epoch 66/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0062 - mae: 0.0403 - val_loss: 0.0012 - val_mae: 0.0248 Epoch 67/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0058 - mae: 0.0400 - val_loss: 0.0014 - val_mae: 0.0272 Epoch 68/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0057 - mae: 0.0421 - val_loss: 0.0011 - val_mae: 0.0251 Epoch 69/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0050 - mae: 0.0390 - val_loss: 9.0703e-04 - val_mae: 0.0193 Epoch 70/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0068 - mae: 0.0419 - val_loss: 0.0014 - val_mae: 0.0260 Epoch 71/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0060 - mae: 0.0418 - val_loss: 8.2340e-04 - val_mae: 0.0187 Epoch 72/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0046 - mae: 0.0390 - val_loss: 0.0012 - val_mae: 0.0259 Epoch 73/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - loss: 0.0076 - mae: 0.0418 - val_loss: 0.0010 - val_mae: 0.0222 Epoch 74/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0062 - mae: 0.0391 - val_loss: 9.6502e-04 - val_mae: 0.0213 Epoch 75/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0060 - mae: 0.0395 - val_loss: 0.0014 - val_mae: 0.0275 Epoch 76/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0042 - mae: 0.0397 - val_loss: 0.0011 - val_mae: 0.0255 Epoch 77/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0055 - mae: 0.0408 - val_loss: 0.0013 - val_mae: 0.0292 Epoch 78/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0057 - mae: 0.0410 - val_loss: 0.0011 - val_mae: 0.0224 Epoch 79/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0053 - mae: 0.0399 - val_loss: 8.7375e-04 - val_mae: 0.0196 Epoch 80/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0060 - mae: 0.0382 - val_loss: 7.8255e-04 - val_mae: 0.0184 Epoch 81/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0056 - mae: 0.0393 - val_loss: 0.0010 - val_mae: 0.0242 Epoch 82/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0051 - mae: 0.0391 - val_loss: 9.3849e-04 - val_mae: 0.0222 Epoch 83/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0061 - mae: 0.0398 - val_loss: 0.0011 - val_mae: 0.0227 Epoch 84/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 53ms/step - loss: 0.0067 - mae: 0.0411 - val_loss: 8.2395e-04 - val_mae: 0.0200 Epoch 85/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0049 - mae: 0.0373 - val_loss: 0.0012 - val_mae: 0.0281 Epoch 86/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0075 - mae: 0.0428 - val_loss: 0.0011 - val_mae: 0.0252 Epoch 87/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0054 - mae: 0.0389 - val_loss: 0.0011 - val_mae: 0.0253 Epoch 88/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0057 - mae: 0.0391 - val_loss: 8.6970e-04 - val_mae: 0.0209 Epoch 89/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0068 - mae: 0.0433 - val_loss: 0.0011 - val_mae: 0.0227 Epoch 90/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0075 - mae: 0.0436 - val_loss: 8.6688e-04 - val_mae: 0.0194 Epoch 91/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0055 - mae: 0.0428 - val_loss: 0.0011 - val_mae: 0.0268 Epoch 92/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0038 - mae: 0.0323 - val_loss: 9.2764e-04 - val_mae: 0.0226 Epoch 93/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 0.0053 - mae: 0.0404 - val_loss: 0.0015 - val_mae: 0.0313 Epoch 94/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0037 - mae: 0.0353 - val_loss: 0.0014 - val_mae: 0.0307 Epoch 95/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0047 - mae: 0.0376 - val_loss: 0.0021 - val_mae: 0.0383 4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 160ms/step ============================================================ LSTM MODEL TRAINING SUMMARY ============================================================ Final epochs trained: 95 Best validation loss: 0.0008 Best validation MAE: 0.0184 Lookback period: 52 weeks LSTM Model Architecture:
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ lstm (LSTM) │ (None, 52, 160) │ 103,680 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout (Dropout) │ (None, 52, 160) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ lstm_1 (LSTM) │ (None, 32) │ 24,704 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_1 (Dropout) │ (None, 32) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense (Dense) │ (None, 112) │ 3,696 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_2 (Dropout) │ (None, 112) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 1) │ 113 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 396,581 (1.51 MB)
Trainable params: 132,193 (516.38 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 264,388 (1.01 MB)
============================================================ EVALUATION METRICS ============================================================ MSE: 62558.7593 RMSE: 250.1175 MAE: 157.0250 MAPE: 0.09% R²: 0.6514 Directional Accuracy: 17.82%
<Figure size 1800x1200 with 0 Axes>
LSTM Residual Analysis: Residual mean: 64.5514 Residual std: 241.6441 Residual min: -481.2880 Residual max: 1415.6563 ================================================== FUTURE FORECAST - LSTM MODEL (NEXT 12 WEEKS) ================================================== 2024-11-03: 2111.44 2024-11-10: 2051.00 2024-11-17: 1987.99 2024-11-24: 1940.50 2024-12-01: 1911.20 2024-12-08: 1897.13 2024-12-15: 1893.15 2024-12-22: 1893.74 2024-12-29: 1894.32 2025-01-05: 1891.50 2025-01-12: 1883.74 2025-01-19: 1870.96
============================================================ LSTM MODEL ADVANTAGES ============================================================ 1. Long-term Memory: Handles long-term dependencies effectively 2. Sequence Learning: Excellent at learning temporal patterns 3. Gate Mechanism: Input, forget, and output gates control information flow 4. Vanishing Gradient Solution: Better than simple RNNs for long sequences 5. Non-linear Modeling: Captures complex non-linear relationships 6. Robustness: Handles noise and missing data well 7. Proven Performance: Extensive successful applications in time series 8. Flexibility: Can model various time series patterns and seasonalities 9. Automatic Feature Learning: Learns relevant features from raw data 10. Scalability: Can handle large datasets efficiently ============================================================ TRAINING PERFORMANCE ANALYSIS ============================================================ Final Training Loss: 0.0049 Final Validation Loss: 0.0021 Final Training MAE: 0.0383 Final Validation MAE: 0.0383 Good: Model shows no signs of overfitting
In [9]:
# --- Step 9: Detailed Model Configuration Report ---
print("\n" + "="*60)
print("GRU MODEL CONFIGURATION & TRAINING DETAILS")
print("="*60)
# Optimizer details
optimizer_config = final_model.optimizer.get_config()
print(f"Optimizer: {final_model.optimizer.__class__.__name__}")
print(f"Learning Rate: {optimizer_config['learning_rate']}")
# Model architecture details
for i, layer in enumerate(final_model.layers):
print(f"\nLayer {i+1}: {layer.__class__.__name__}")
try:
print(f" Units: {layer.units}")
except:
pass
try:
print(f" Activation: {layer.activation.__name__}")
except:
pass
try:
print(f" Dropout Rate: {layer.rate}")
except:
pass
if hasattr(layer, "return_sequences"):
print(f" Return Sequences: {layer.return_sequences}")
# Training summary
print("\nTraining Details:")
print(f"Epochs Trained: {len(history.history['loss'])}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Training MAE: {history.history['mae'][-1]:.4f}")
print(f"Final Validation MAE: {history.history['val_mae'][-1]:.4f}")
print("\n" + "="*60)
print("NOTE: The above configuration includes optimizer, activation functions, "
"learning rate, and automatic layer details for full reproducibility.")
print("="*60)
============================================================ GRU MODEL CONFIGURATION & TRAINING DETAILS ============================================================ Optimizer: Adam Learning Rate: 0.001081034424714744 Layer 1: LSTM Units: 160 Activation: tanh Return Sequences: True Layer 2: Dropout Dropout Rate: 0.5 Layer 3: LSTM Units: 32 Activation: tanh Return Sequences: False Layer 4: Dropout Dropout Rate: 0.30000000000000004 Layer 5: Dense Units: 112 Activation: relu Layer 6: Dropout Dropout Rate: 0.30000000000000004 Layer 7: Dense Units: 1 Activation: linear Training Details: Epochs Trained: 47 Final Training Loss: 0.0063 Final Validation Loss: 0.0020 Final Training MAE: 0.0422 Final Validation MAE: 0.0325 ============================================================ NOTE: The above configuration includes optimizer, activation functions, learning rate, and automatic layer details for full reproducibility. ============================================================
In [3]:
plt.figure(figsize=(18, 12))
# Plot 3: Separate View - Actual vs Predicted
plt.plot(test_dates, y_actual, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted', color='red', linestyle='--', linewidth=2)
plt.title('Actual vs Predicted - LSTM Model (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
In [ ]:
--- GRU ---¶
--- Import libraries for GRU ----¶
In [338]:
!pip install keras-tuner
Requirement already satisfied: keras-tuner in c:\users\marti\anaconda3\lib\site-packages (1.4.7) Requirement already satisfied: keras in c:\users\marti\anaconda3\lib\site-packages (from keras-tuner) (3.5.0) Requirement already satisfied: packaging in c:\users\marti\appdata\roaming\python\python312\site-packages (from keras-tuner) (24.1) Requirement already satisfied: requests in c:\users\marti\anaconda3\lib\site-packages (from keras-tuner) (2.32.2) Requirement already satisfied: kt-legacy in c:\users\marti\anaconda3\lib\site-packages (from keras-tuner) (1.0.5) Requirement already satisfied: absl-py in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (2.1.0) Requirement already satisfied: numpy in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (1.26.4) Requirement already satisfied: rich in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (13.3.5) Requirement already satisfied: namex in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (0.0.8) Requirement already satisfied: h5py in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (3.11.0) Requirement already satisfied: optree in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (0.12.1) Requirement already satisfied: ml-dtypes in c:\users\marti\anaconda3\lib\site-packages (from keras->keras-tuner) (0.4.0) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\marti\anaconda3\lib\site-packages (from requests->keras-tuner) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in c:\users\marti\anaconda3\lib\site-packages (from requests->keras-tuner) (3.7) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\marti\anaconda3\lib\site-packages (from requests->keras-tuner) (2.2.2) Requirement already satisfied: certifi>=2017.4.17 in c:\users\marti\anaconda3\lib\site-packages (from requests->keras-tuner) (2024.7.4) Requirement already satisfied: typing-extensions>=4.5.0 in c:\users\marti\anaconda3\lib\site-packages (from optree->keras->keras-tuner) (4.11.0) Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in c:\users\marti\anaconda3\lib\site-packages (from rich->keras->keras-tuner) (2.2.0) Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\users\marti\appdata\roaming\python\python312\site-packages (from rich->keras->keras-tuner) (2.18.0) Requirement already satisfied: mdurl~=0.1 in c:\users\marti\anaconda3\lib\site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->keras->keras-tuner) (0.1.0) Could not fetch URL https://pypi.org/simple/pip/: There was a problem confirming the ssl certificate: HTTPSConnectionPool(host='pypi.org', port=443): Max retries exceeded with url: /simple/pip/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)'))) - skipping
In [15]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout # Changed LSTM to GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf
# Suppress warnings
warnings.filterwarnings("ignore")
# Load and preprocess data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
print(f"Original data length: {len(data)}")
# --- Step 1: Data Preparation for GRU ---
# Use original data directly
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.reshape(-1, 1))
# Create sequences for GRU
def create_sequences(data, lookback=52):
X, y = [], []
for i in range(lookback, len(data)):
X.append(data[i-lookback:i])
y.append(data[i])
return np.array(X), np.array(y)
lookback = 52 # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)
# Reshape for GRU [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
# --- Step 2: Hyperparameter Tuning for GRU ---
def build_model(hp):
model = Sequential()
# Number of GRU layers (changed from LSTM to GRU)
for i in range(hp.Int('num_layers', 1, 3)):
model.add(GRU( # Changed LSTM to GRU
units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
))
model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
# Dense layers with ReLU activation
for i in range(hp.Int('dense_layers', 0, 2)):
model.add(Dense(
units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
activation='relu'
))
model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
model.add(Dense(1, activation='linear'))
model.compile(
optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
loss='mse',
metrics=['mae']
)
return model
print("\nStarting GRU hyperparameter tuning...")
tuner = RandomSearch(
build_model,
objective='val_loss',
max_trials=15,
executions_per_trial=2,
directory='gru_tuning', # Changed directory name
project_name='cardamom_gru' # Changed project name
)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
tuner.search(
X_train, y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of GRU layers: {best_hp.get('num_layers')}") # Changed from LSTM to GRU
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
print(f"GRU layer {i+1} units: {best_hp.get(f'units_{i}')}") # Changed from LSTM to GRU
print(f"GRU layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}") # Changed from LSTM to GRU
# --- Step 3: Build and Train Final GRU Model ---
final_model = tuner.hypermodel.build(best_hp)
print("\nTraining final GRU model...") # Changed to GRU
history = final_model.fit(
X_train, y_train,
epochs=200,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# --- Step 4: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()
# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
# Get actual values (original scale)
y_actual = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
# --- Step 5: Evaluation ---
def evaluate_forecast(actual, forecast):
"""Comprehensive forecast evaluation"""
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
mape = mean_absolute_percentage_error(actual, forecast)
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
return {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'MAPE': mape,
'R²': r2,
'Directional Accuracy': da
}
# Evaluate on original data
metrics = evaluate_forecast(y_actual, y_pred)
print("\n" + "="*60)
print("GRU MODEL TRAINING SUMMARY") # Changed to GRU
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print("\nGRU Model Architecture:") # Changed to GRU
final_model.summary()
print("\n" + "="*60)
print("EVALUATION METRICS")
print("="*60)
for metric, value in metrics.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
# --- Step 6: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('GRU Training History') # Changed to GRU
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result8.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Full Data with Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', color='blue', alpha=0.7)
plt.plot(test_dates, y_pred, label='GRU Forecast', color='red', linestyle='--', linewidth=2) # Changed to GRU
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original Data vs GRU Forecast') # Changed to GRU
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result7.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2) # Added GRU
plt.fill_between(test_dates,
y_pred - metrics['RMSE'],
y_pred + metrics['RMSE'],
alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - GRU Model (Test Period)') # Changed to GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result6.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 7: Residual Analysis ---
residuals = y_actual - y_pred
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('GRU Residuals Over Time') # Changed to GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result5.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('GRU Residual Distribution') # Changed to GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result4.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result3.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result2.png", dpi=300, bbox_inches='tight')
plt.show()
print("\nGRU Residual Analysis:") # Changed to GRU
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")
# --- Step 8: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
"""Forecast future values"""
forecasts = []
current_sequence = last_sequence.copy()
for _ in range(steps):
prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
forecasts.append(prediction)
# Update sequence
new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
current_sequence = new_sequence
# Inverse transform
forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
return future_dates, forecasts
# Forecast next 12 weeks
try:
last_sequence = scaled_data[-lookback:]
future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)
print("\n" + "="*50)
print("FUTURE FORECAST - GRU MODEL (NEXT 12 WEEKS)") # Changed to GRU
print("="*50)
for date, price in zip(future_dates, future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
# Plot future forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
plt.plot(future_dates, future_prices, label='GRU Future Forecast', color='red', linestyle='--', linewidth=2) # Changed to GRU
plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title('GRU Future Price Forecast (Next 12 Weeks)') # Changed to GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/gru_result1.png", dpi=300, bbox_inches='tight')
plt.show()
except Exception as e:
print(f"Future forecasting failed: {e}")
# --- GRU Benefits Summary ---
print("\n" + "="*60)
print("GRU MODEL ADVANTAGES OVER LSTM")
print("="*60)
print("1. Computational Efficiency: Fewer parameters (2 gates vs LSTM's 3 gates)")
print("2. Faster Training: Less complex architecture leads to faster training times")
print("3. Better Performance: Often performs better on smaller datasets")
print("4. Reduced Overfitting: Simpler architecture can be less prone to overfitting")
print("5. Memory Efficiency: Uses less memory during training and inference")
print("6. Faster Convergence: Typically converges faster than LSTM")
print("7. Better Gradient Flow: Simpler architecture improves gradient propagation")
print("8. Simpler Architecture: Easier to train and tune")
print("9. Comparable Performance: Often achieves similar results to LSTM with less complexity")
print("10. Efficient Resource Usage: Better for resource-constrained environments")
# --- Additional: Training vs Validation Performance Analysis ---
print("\n" + "="*60)
print("TRAINING PERFORMANCE ANALYSIS")
print("="*60)
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]
final_train_mae = history.history['mae'][-1]
final_val_mae = history.history['val_mae'][-1]
print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")
print(f"Final Training MAE: {final_train_mae:.4f}")
print(f"Final Validation MAE: {final_val_mae:.4f}")
# Check for overfitting
if final_val_loss > final_train_loss * 1.1:
print("Warning: Potential overfitting detected (validation loss significantly higher than training loss)")
else:
print("Good: Model shows no signs of overfitting")
# --- GRU vs LSTM Comparison ---
print("\n" + "="*60)
print("GRU vs LSTM COMPARISON")
print("="*60)
print("GRU Advantages:")
print("- 30% fewer parameters than equivalent LSTM")
print("- 20-30% faster training time")
print("- Simpler architecture with 2 gates (update and reset)")
print("- Better for smaller datasets")
print("- Less prone to overfitting")
print("\nLSTM Advantages:")
print("- More expressive power with 3 gates")
print("- Better for very long sequences")
print("- More established in research literature")
print("- Slightly better on some complex tasks")
print("\nRecommendation: GRU is often preferred for its efficiency and comparable performance!")
Original data length: 722 Training sequences: (468, 52, 1) Validation sequences: (100, 52, 1) Test sequences: (102, 52, 1) Starting GRU hyperparameter tuning... Reloading Tuner from gru_tuning\cardamom_gru\tuner0.json Best Hyperparameters: Number of GRU layers: 2 Learning rate: 0.0003473714958642173 GRU layer 1 units: 256 GRU layer 1 dropout: 0.30000000000000004 GRU layer 2 units: 224 GRU layer 2 dropout: 0.1 Training final GRU model... Epoch 1/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 9s 157ms/step - loss: 0.0371 - mae: 0.1428 - val_loss: 0.0026 - val_mae: 0.0411 Epoch 2/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 108ms/step - loss: 0.0090 - mae: 0.0510 - val_loss: 0.0045 - val_mae: 0.0612 Epoch 3/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 108ms/step - loss: 0.0075 - mae: 0.0485 - val_loss: 9.5461e-04 - val_mae: 0.0200 Epoch 4/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0072 - mae: 0.0397 - val_loss: 9.8309e-04 - val_mae: 0.0212 Epoch 5/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0057 - mae: 0.0347 - val_loss: 0.0012 - val_mae: 0.0244 Epoch 6/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0062 - mae: 0.0354 - val_loss: 0.0013 - val_mae: 0.0275 Epoch 7/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0054 - mae: 0.0422 - val_loss: 8.8386e-04 - val_mae: 0.0196 Epoch 8/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0058 - mae: 0.0344 - val_loss: 8.4164e-04 - val_mae: 0.0180 Epoch 9/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0051 - mae: 0.0301 - val_loss: 8.5880e-04 - val_mae: 0.0188 Epoch 10/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0050 - mae: 0.0312 - val_loss: 8.7369e-04 - val_mae: 0.0203 Epoch 11/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0054 - mae: 0.0322 - val_loss: 9.8875e-04 - val_mae: 0.0224 Epoch 12/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0037 - mae: 0.0262 - val_loss: 7.9651e-04 - val_mae: 0.0175 Epoch 13/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 113ms/step - loss: 0.0053 - mae: 0.0309 - val_loss: 0.0015 - val_mae: 0.0308 Epoch 14/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0058 - mae: 0.0369 - val_loss: 7.7190e-04 - val_mae: 0.0186 Epoch 15/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0047 - mae: 0.0272 - val_loss: 0.0011 - val_mae: 0.0252 Epoch 16/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0049 - mae: 0.0313 - val_loss: 9.4658e-04 - val_mae: 0.0233 Epoch 17/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0044 - mae: 0.0299 - val_loss: 0.0012 - val_mae: 0.0271 Epoch 18/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0037 - mae: 0.0295 - val_loss: 0.0010 - val_mae: 0.0239 Epoch 19/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0050 - mae: 0.0347 - val_loss: 6.8192e-04 - val_mae: 0.0173 Epoch 20/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0044 - mae: 0.0294 - val_loss: 6.2613e-04 - val_mae: 0.0149 Epoch 21/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0032 - mae: 0.0278 - val_loss: 7.3258e-04 - val_mae: 0.0192 Epoch 22/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0037 - mae: 0.0283 - val_loss: 8.5199e-04 - val_mae: 0.0210 Epoch 23/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0042 - mae: 0.0280 - val_loss: 0.0011 - val_mae: 0.0268 Epoch 24/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 112ms/step - loss: 0.0035 - mae: 0.0309 - val_loss: 6.3899e-04 - val_mae: 0.0158 Epoch 25/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0031 - mae: 0.0243 - val_loss: 5.8518e-04 - val_mae: 0.0140 Epoch 26/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0047 - mae: 0.0321 - val_loss: 5.7091e-04 - val_mae: 0.0145 Epoch 27/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0039 - mae: 0.0278 - val_loss: 0.0021 - val_mae: 0.0407 Epoch 28/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0048 - mae: 0.0341 - val_loss: 7.1002e-04 - val_mae: 0.0181 Epoch 29/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0028 - mae: 0.0244 - val_loss: 5.6439e-04 - val_mae: 0.0140 Epoch 30/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0034 - mae: 0.0247 - val_loss: 6.8003e-04 - val_mae: 0.0176 Epoch 31/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0032 - mae: 0.0248 - val_loss: 5.4630e-04 - val_mae: 0.0135 Epoch 32/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0039 - mae: 0.0283 - val_loss: 7.4940e-04 - val_mae: 0.0194 Epoch 33/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 111ms/step - loss: 0.0051 - mae: 0.0310 - val_loss: 5.7265e-04 - val_mae: 0.0143 Epoch 34/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0041 - mae: 0.0278 - val_loss: 6.4447e-04 - val_mae: 0.0178 Epoch 35/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 107ms/step - loss: 0.0043 - mae: 0.0283 - val_loss: 5.3495e-04 - val_mae: 0.0136 Epoch 36/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0043 - mae: 0.0295 - val_loss: 5.6560e-04 - val_mae: 0.0156 Epoch 37/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0042 - mae: 0.0266 - val_loss: 9.6885e-04 - val_mae: 0.0253 Epoch 38/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0028 - mae: 0.0273 - val_loss: 7.1013e-04 - val_mae: 0.0197 Epoch 39/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0051 - mae: 0.0319 - val_loss: 6.3080e-04 - val_mae: 0.0165 Epoch 40/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0040 - mae: 0.0267 - val_loss: 0.0011 - val_mae: 0.0262 Epoch 41/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0039 - mae: 0.0290 - val_loss: 0.0015 - val_mae: 0.0325 Epoch 42/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0051 - mae: 0.0332 - val_loss: 0.0010 - val_mae: 0.0251 Epoch 43/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0044 - mae: 0.0285 - val_loss: 0.0013 - val_mae: 0.0304 Epoch 44/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0037 - mae: 0.0339 - val_loss: 5.3816e-04 - val_mae: 0.0138 Epoch 45/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0047 - mae: 0.0324 - val_loss: 5.4324e-04 - val_mae: 0.0139 Epoch 46/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 112ms/step - loss: 0.0036 - mae: 0.0241 - val_loss: 7.5730e-04 - val_mae: 0.0195 Epoch 47/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0027 - mae: 0.0236 - val_loss: 0.0015 - val_mae: 0.0326 Epoch 48/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0038 - mae: 0.0335 - val_loss: 7.2088e-04 - val_mae: 0.0189 Epoch 49/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0043 - mae: 0.0320 - val_loss: 5.6190e-04 - val_mae: 0.0157 Epoch 50/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0041 - mae: 0.0292 - val_loss: 5.2849e-04 - val_mae: 0.0145 Epoch 51/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0044 - mae: 0.0281 - val_loss: 5.3974e-04 - val_mae: 0.0137 Epoch 52/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0036 - mae: 0.0259 - val_loss: 8.9868e-04 - val_mae: 0.0232 Epoch 53/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0048 - mae: 0.0288 - val_loss: 8.2981e-04 - val_mae: 0.0217 Epoch 54/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0050 - mae: 0.0299 - val_loss: 5.3542e-04 - val_mae: 0.0138 Epoch 55/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0035 - mae: 0.0268 - val_loss: 7.8986e-04 - val_mae: 0.0209 Epoch 56/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 111ms/step - loss: 0.0039 - mae: 0.0271 - val_loss: 0.0011 - val_mae: 0.0276 Epoch 57/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0047 - mae: 0.0320 - val_loss: 0.0015 - val_mae: 0.0337 Epoch 58/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0044 - mae: 0.0325 - val_loss: 5.9702e-04 - val_mae: 0.0157 Epoch 59/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0032 - mae: 0.0280 - val_loss: 6.9913e-04 - val_mae: 0.0197 Epoch 60/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0029 - mae: 0.0248 - val_loss: 5.0660e-04 - val_mae: 0.0137 Epoch 61/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0026 - mae: 0.0243 - val_loss: 6.7132e-04 - val_mae: 0.0181 Epoch 62/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0028 - mae: 0.0250 - val_loss: 8.6966e-04 - val_mae: 0.0236 Epoch 63/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0032 - mae: 0.0278 - val_loss: 8.1659e-04 - val_mae: 0.0211 Epoch 64/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0032 - mae: 0.0271 - val_loss: 0.0026 - val_mae: 0.0463 Epoch 65/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 113ms/step - loss: 0.0047 - mae: 0.0377 - val_loss: 5.5698e-04 - val_mae: 0.0143 Epoch 66/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0049 - mae: 0.0259 - val_loss: 5.4018e-04 - val_mae: 0.0143 Epoch 67/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0036 - mae: 0.0253 - val_loss: 8.1784e-04 - val_mae: 0.0211 Epoch 68/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0035 - mae: 0.0255 - val_loss: 8.8462e-04 - val_mae: 0.0225 Epoch 69/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 114ms/step - loss: 0.0037 - mae: 0.0274 - val_loss: 6.7853e-04 - val_mae: 0.0181 Epoch 70/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 105ms/step - loss: 0.0034 - mae: 0.0297 - val_loss: 5.0852e-04 - val_mae: 0.0140 Epoch 71/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0030 - mae: 0.0248 - val_loss: 5.7793e-04 - val_mae: 0.0164 Epoch 72/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0041 - mae: 0.0287 - val_loss: 5.1131e-04 - val_mae: 0.0134 Epoch 73/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0039 - mae: 0.0247 - val_loss: 9.4247e-04 - val_mae: 0.0235 Epoch 74/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0036 - mae: 0.0258 - val_loss: 7.4028e-04 - val_mae: 0.0193 Epoch 75/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 106ms/step - loss: 0.0026 - mae: 0.0224 - val_loss: 6.8610e-04 - val_mae: 0.0184 4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 219ms/step ============================================================ GRU MODEL TRAINING SUMMARY ============================================================ Final epochs trained: 75 Best validation loss: 0.0005 Best validation MAE: 0.0134 Lookback period: 52 weeks GRU Model Architecture:
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ gru (GRU) │ (None, 52, 256) │ 198,912 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_3 (Dropout) │ (None, 52, 256) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ gru_1 (GRU) │ (None, 224) │ 323,904 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_4 (Dropout) │ (None, 224) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_2 (Dense) │ (None, 1) │ 225 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 1,569,125 (5.99 MB)
Trainable params: 523,041 (2.00 MB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 1,046,084 (3.99 MB)
============================================================ EVALUATION METRICS ============================================================ MSE: 46948.7033 RMSE: 216.6765 MAE: 125.2471 MAPE: 0.07% R²: 0.7384 Directional Accuracy: 18.81%
GRU Residual Analysis: Residual mean: 45.2228 Residual std: 211.9047 Residual min: -730.7085 Residual max: 1306.5748 ================================================== FUTURE FORECAST - GRU MODEL (NEXT 12 WEEKS) ================================================== 2024-11-03: 2235.58 2024-11-10: 2187.44 2024-11-17: 2146.88 2024-11-24: 2117.85 2024-12-01: 2099.04 2024-12-08: 2087.23 2024-12-15: 2079.03 2024-12-22: 2071.83 2024-12-29: 2064.06 2025-01-05: 2055.10 2025-01-12: 2044.94 2025-01-19: 2033.91
============================================================ GRU MODEL ADVANTAGES OVER LSTM ============================================================ 1. Computational Efficiency: Fewer parameters (2 gates vs LSTM's 3 gates) 2. Faster Training: Less complex architecture leads to faster training times 3. Better Performance: Often performs better on smaller datasets 4. Reduced Overfitting: Simpler architecture can be less prone to overfitting 5. Memory Efficiency: Uses less memory during training and inference 6. Faster Convergence: Typically converges faster than LSTM 7. Better Gradient Flow: Simpler architecture improves gradient propagation 8. Simpler Architecture: Easier to train and tune 9. Comparable Performance: Often achieves similar results to LSTM with less complexity 10. Efficient Resource Usage: Better for resource-constrained environments ============================================================ TRAINING PERFORMANCE ANALYSIS ============================================================ Final Training Loss: 0.0041 Final Validation Loss: 0.0007 Final Training MAE: 0.0255 Final Validation MAE: 0.0184 Good: Model shows no signs of overfitting ============================================================ GRU vs LSTM COMPARISON ============================================================ GRU Advantages: - 30% fewer parameters than equivalent LSTM - 20-30% faster training time - Simpler architecture with 2 gates (update and reset) - Better for smaller datasets - Less prone to overfitting LSTM Advantages: - More expressive power with 3 gates - Better for very long sequences - More established in research literature - Slightly better on some complex tasks Recommendation: GRU is often preferred for its efficiency and comparable performance!
In [12]:
# --- Step 9: Detailed Model Configuration Report ---
print("\n" + "="*60)
print("GRU MODEL CONFIGURATION & TRAINING DETAILS")
print("="*60)
# Optimizer details
optimizer_config = final_model.optimizer.get_config()
print(f"Optimizer: {final_model.optimizer.__class__.__name__}")
print(f"Learning Rate: {optimizer_config['learning_rate']}")
# Model architecture details
for i, layer in enumerate(final_model.layers):
print(f"\nLayer {i+1}: {layer.__class__.__name__}")
if hasattr(layer, "units"):
print(f" Units: {layer.units}")
if hasattr(layer, "activation"):
print(f" Activation: {layer.activation.__name__}")
if hasattr(layer, "rate"):
print(f" Dropout Rate: {layer.rate}")
if hasattr(layer, "return_sequences"):
print(f" Return Sequences: {layer.return_sequences}")
# Training summary
print("\nTraining Summary:")
print(f"Total Epochs Trained: {len(history.history['loss'])}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Training MAE: {history.history['mae'][-1]:.4f}")
print(f"Final Validation MAE: {history.history['val_mae'][-1]:.4f}")
print("\n" + "="*60)
print("NOTE: This section auto-captures optimizer, learning rate, "
"activation functions, and all layer details for reproducibility.")
print("="*60)
============================================================ GRU MODEL CONFIGURATION & TRAINING DETAILS ============================================================ Optimizer: Adam Learning Rate: 0.00034737150417640805 Layer 1: GRU Units: 256 Activation: tanh Return Sequences: True Layer 2: Dropout Dropout Rate: 0.30000000000000004 Layer 3: GRU Units: 224 Activation: tanh Return Sequences: False Layer 4: Dropout Dropout Rate: 0.1 Layer 5: Dense Units: 1 Activation: linear Training Summary: Total Epochs Trained: 62 Final Training Loss: 0.0043 Final Validation Loss: 0.0010 Final Training MAE: 0.0290 Final Validation MAE: 0.0259 ============================================================ NOTE: This section auto-captures optimizer, learning rate, activation functions, and all layer details for reproducibility. ============================================================
In [13]:
plt.figure(figsize=(18, 12))
# Plot 3: Separate View - Actual vs Predicted
plt.plot(test_dates, y_actual, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2) # Added GRU
plt.title('Actual vs Predicted - GRU Model (Test Period)') # Changed to GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
--- Fourier + ARIMA ---¶
--- Import libraries for Fourier + ARIMA and SARIMA----¶
In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from pmdarima import auto_arima
from scipy.fft import fft, fftfreq
import warnings
warnings.filterwarnings('ignore')
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
# Convert price to Rs./kg
df['Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
prices = df['Price (Rs./kg)'].dropna()
print(f"Using original data length: {len(prices)}")
# --- Fourier Feature Engineering ---
def generate_fourier_features(series, n_harmonics=5, period=52):
"""Generate Fourier features for seasonal patterns"""
n = len(series)
t = np.arange(n)
fourier_features = pd.DataFrame(index=series.index)
for k in range(1, n_harmonics + 1):
fourier_features[f'cos_{k}'] = np.cos(2 * np.pi * k * t / period)
fourier_features[f'sin_{k}'] = np.sin(2 * np.pi * k * t / period)
return fourier_features
# Generate Fourier features
fourier_features = generate_fourier_features(prices, n_harmonics=5, period=52)
# --- Step 1: Train/Test Split ---
split1 = int(0.7 * len(prices))
split2 = int(0.85 * len(prices))
train, val, test = prices.iloc[:split1], prices.iloc[split1:split2], prices.iloc[split2:]
# Split Fourier features accordingly
fourier_train = fourier_features.iloc[:split1]
fourier_val = fourier_features.iloc[split1:split2]
fourier_test = fourier_features.iloc[split2:]
# Combine train and validation for final training
full_train = pd.concat([train, val])
full_fourier = pd.concat([fourier_train, fourier_val])
# --- Option 1: Fourier ARIMA ---
print("\n" + "="*50)
print("FOURIER ARIMA MODEL")
print("="*50)
# Auto ARIMA for optimal order (non-seasonal since Fourier handles seasonality)
auto_model_arima = auto_arima(
full_train,
seasonal=False, # No seasonal component - Fourier handles seasonality
stepwise=True,
suppress_warnings=True,
error_action='ignore',
trace=True,
max_order=10,
information_criterion='aic',
test='adf'
)
print(f"Optimal ARIMA order: {auto_model_arima.order}")
# Build Fourier ARIMA model
fourier_arima_model = ARIMA(
full_train,
order=auto_model_arima.order,
exog=full_fourier
)
fourier_arima_result = fourier_arima_model.fit()
print("\nFOURIER ARIMA MODEL SUMMARY")
print("="*50)
print(fourier_arima_result.summary())
# Fourier ARIMA forecasting
fourier_arima_forecast = fourier_arima_result.get_forecast(
steps=len(test),
exog=fourier_test
)
fourier_arima_mean = fourier_arima_forecast.predicted_mean
fourier_arima_conf_int = fourier_arima_forecast.conf_int()
# --- Option 2: Fourier SARIMA ---
print("\n" + "="*50)
print("FOURIER SARIMA MODEL")
print("="*50)
# Auto SARIMA for optimal order (with reduced seasonal component since Fourier helps)
auto_model_sarima = auto_arima(
full_train,
seasonal=True,
m=26, # Reduced seasonal period since Fourier helps
stepwise=True,
suppress_warnings=True,
error_action='ignore',
trace=True,
max_order=10,
information_criterion='aic',
test='adf'
)
print(f"Optimal SARIMA order: {auto_model_sarima.order}")
print(f"Optimal Seasonal order: {auto_model_sarima.seasonal_order}")
# Build Fourier SARIMA model
fourier_sarima_model = SARIMAX(
endog=full_train,
exog=full_fourier,
order=auto_model_sarima.order,
seasonal_order=auto_model_sarima.seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
fourier_sarima_result = fourier_sarima_model.fit(disp=False)
print("\nFOURIER SARIMA MODEL SUMMARY")
print("="*50)
print(fourier_sarima_result.summary())
# Fourier SARIMA forecasting
fourier_sarima_forecast = fourier_sarima_result.get_forecast(
steps=len(test),
exog=fourier_test
)
fourier_sarima_mean = fourier_sarima_forecast.predicted_mean
fourier_sarima_conf_int = fourier_sarima_forecast.conf_int()
# --- Evaluation Function ---
def evaluate_forecast(actual, forecast, model_name):
"""Comprehensive forecast evaluation"""
actual = np.asarray(actual)
forecast = np.asarray(forecast)
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
with np.errstate(divide='ignore', invalid='ignore'):
ape = np.where(actual != 0, np.abs(actual - forecast) / actual, np.nan)
mape = np.nanmean(ape) * 100
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = np.mean(actual_diff == forecast_diff) * 100
metrics = {
'MSE': f"{mse:.4f}",
'RMSE': f"{rmse:.4f}",
'MAE': f"{mae:.4f}",
'MAPE': f"{mape:.2f}%" if not np.isnan(mape) else "N/A",
'R²': f"{r2:.4f}",
'Directional Accuracy': f"{da:.2f}%"
}
print(f"\n{model_name} EVALUATION METRICS")
print("="*50)
for metric, value in metrics.items():
print(f"{metric}: {value}")
return metrics
# Evaluate both models
fourier_arima_metrics = evaluate_forecast(test.values, fourier_arima_mean.values, "FOURIER ARIMA")
fourier_sarima_metrics = evaluate_forecast(test.values, fourier_sarima_mean.values, "FOURIER SARIMA")
plt.figure(figsize=(12, 6))
fourier_features.iloc[:, :4].plot(ax=plt.gca()) # Show first 4 Fourier components
plt.title('Fourier Features (First 4 Components)')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original Data
plt.figure(figsize=(12, 6))
plt.plot(prices.index, prices, label='Original Data', color='blue', linewidth=2)
plt.title('Original Cardamom Price Data')
plt.ylabel('Price (Rs./kg)')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Fourier ARIMA Results
plt.figure(figsize=(12, 6))
plt.plot(prices.index[:len(full_train)], full_train, label='Train+Val', color='blue', alpha=0.7)
plt.plot(test.index, test, label='Actual Test', color='green', linewidth=2)
plt.plot(test.index, fourier_arima_mean, label='Fourier ARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.fill_between(test.index, fourier_arima_conf_int.iloc[:, 0], fourier_arima_conf_int.iloc[:, 1],
color='pink', alpha=0.3, label='95% CI')
plt.axvline(test.index[0], color='gray', linestyle='--', label='Test Start')
plt.title(f'Fourier ARIMA: Order {auto_model_arima.order}')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 4: Fourier SARIMA Results
plt.figure(figsize=(12, 6))
plt.plot(prices.index[:len(full_train)], full_train, label='Train+Val', color='blue', alpha=0.7)
plt.plot(test.index, test, label='Actual Test', color='green', linewidth=2)
plt.plot(test.index, fourier_sarima_mean, label='Fourier SARIMA Forecast', color='orange', linestyle='--', linewidth=2)
plt.fill_between(test.index, fourier_sarima_conf_int.iloc[:, 0], fourier_sarima_conf_int.iloc[:, 1],
color='lightblue', alpha=0.3, label='95% CI')
plt.axvline(test.index[0], color='gray', linestyle='--', label='Test Start')
plt.title(f'Fourier SARIMA: Order {auto_model_sarima.order}{auto_model_sarima.seasonal_order}')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 5: Comparison
plt.figure(figsize=(12, 6))
plt.plot(test.index, test, label='Actual Test', color='black', linewidth=3)
plt.plot(test.index, fourier_arima_mean, label='Fourier ARIMA', color='red', linestyle='--', linewidth=2)
plt.plot(test.index, fourier_sarima_mean, label='Fourier SARIMA', color='orange', linestyle='--', linewidth=2)
plt.title('Model Comparison: Actual vs Forecasts')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result5.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 6: Residuals Comparison
plt.figure(figsize=(12, 6))
arima_residuals = test.values - fourier_arima_mean.values
sarima_residuals = test.values - fourier_sarima_mean.values
plt.plot(test.index, arima_residuals, label='Fourier ARIMA Residuals', color='red', alpha=0.7)
plt.plot(test.index, sarima_residuals, label='Fourier SARIMA Residuals', color='orange', alpha=0.7)
plt.axhline(0, color='black', linestyle='--')
plt.title('Residuals Comparison')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result6.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Future Forecasting ---
def forecast_future_fourier(model_result, fourier_features, steps=12, period=52):
"""Forecast future values with Fourier features"""
# Create future dates
last_date = prices.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
# Create Fourier features for future periods
n = len(prices)
future_t = np.arange(n, n + steps)
future_fourier = pd.DataFrame(index=future_dates)
for k in range(1, 6): # 5 harmonics
future_fourier[f'cos_{k}'] = np.cos(2 * np.pi * k * future_t / period)
future_fourier[f'sin_{k}'] = np.sin(2 * np.pi * k * future_t / period)
# Forecast
if hasattr(model_result, 'get_forecast'):
future_forecast = model_result.get_forecast(steps=steps, exog=future_fourier)
return future_dates, future_forecast.predicted_mean, future_forecast.conf_int()
else:
# For ARIMA models
future_forecast = model_result.forecast(steps=steps, exog=future_fourier)
return future_dates, future_forecast, None
# Forecast next 12 weeks with both models
print("\n" + "="*50)
print("FUTURE FORECAST (NEXT 12 WEEKS)")
print("="*50)
try:
# Fourier ARIMA future forecast
arima_future_dates, arima_future_prices, arima_future_ci = forecast_future_fourier(
fourier_arima_result, fourier_features, steps=12
)
print("\nFourier ARIMA Future Forecast:")
for date, price in zip(arima_future_dates, arima_future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
except Exception as e:
print(f"Fourier ARIMA future forecasting failed: {e}")
try:
# Fourier SARIMA future forecast
sarima_future_dates, sarima_future_prices, sarima_future_ci = forecast_future_fourier(
fourier_sarima_result, fourier_features, steps=12
)
print("\nFourier SARIMA Future Forecast:")
for date, price in zip(sarima_future_dates, sarima_future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
except Exception as e:
print(f"Fourier SARIMA future forecasting failed: {e}")
# --- Model Comparison Summary ---
print("\n" + "="*50)
print("MODEL COMPARISON SUMMARY")
print("="*50)
print(f"{'Metric':<20} {'Fourier ARIMA':<15} {'Fourier SARIMA':<15}")
print("-" * 50)
for metric in ['MSE', 'RMSE', 'MAE', 'MAPE', 'R²', 'Directional Accuracy']:
print(f"{metric:<20} {fourier_arima_metrics[metric]:<15} {fourier_sarima_metrics[metric]:<15}")
# Determine best model based on RMSE
rmse_arima = float(fourier_arima_metrics['RMSE'])
rmse_sarima = float(fourier_sarima_metrics['RMSE'])
if rmse_arima < rmse_sarima:
print(f"\nBest Model: Fourier ARIMA (Lower RMSE: {rmse_arima:.4f} vs {rmse_sarima:.4f})")
best_model = fourier_arima_result
best_model_name = "Fourier ARIMA"
else:
print(f"\nBest Model: Fourier SARIMA (Lower RMSE: {rmse_sarima:.4f} vs {rmse_arima:.4f})")
best_model = fourier_sarima_result
best_model_name = "Fourier SARIMA"
print(f"\nSelected {best_model_name} as the best performing model")
Using original data length: 722
==================================================
FOURIER ARIMA MODEL
==================================================
Performing stepwise search to minimize aic
ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=7916.653, Time=0.81 sec
ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=7967.513, Time=0.09 sec
ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=7950.813, Time=0.12 sec
ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=7948.381, Time=0.15 sec
ARIMA(0,1,0)(0,0,0)[0] : AIC=7965.525, Time=0.09 sec
ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=7921.770, Time=0.71 sec
ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=7921.756, Time=0.60 sec
ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=7914.388, Time=1.08 sec
ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=7913.030, Time=0.85 sec
ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=7932.289, Time=0.17 sec
ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=7914.640, Time=1.23 sec
ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=7952.034, Time=0.15 sec
ARIMA(4,1,0)(0,0,0)[0] intercept : AIC=7930.195, Time=0.21 sec
ARIMA(4,1,2)(0,0,0)[0] intercept : AIC=7914.776, Time=1.36 sec
ARIMA(3,1,1)(0,0,0)[0] : AIC=7911.076, Time=0.43 sec
ARIMA(2,1,1)(0,0,0)[0] : AIC=7919.798, Time=0.35 sec
ARIMA(3,1,0)(0,0,0)[0] : AIC=7930.310, Time=0.12 sec
ARIMA(4,1,1)(0,0,0)[0] : AIC=7912.686, Time=0.50 sec
ARIMA(3,1,2)(0,0,0)[0] : AIC=7912.433, Time=0.57 sec
ARIMA(2,1,0)(0,0,0)[0] : AIC=7950.050, Time=0.11 sec
ARIMA(2,1,2)(0,0,0)[0] : AIC=7914.694, Time=0.38 sec
ARIMA(4,1,0)(0,0,0)[0] : AIC=7928.219, Time=0.15 sec
ARIMA(4,1,2)(0,0,0)[0] : AIC=7912.821, Time=0.71 sec
Best model: ARIMA(3,1,1)(0,0,0)[0]
Total fit time: 10.950 seconds
Optimal ARIMA order: (3, 1, 1)
FOURIER ARIMA MODEL SUMMARY
==================================================
SARIMAX Results
==============================================================================
Dep. Variable: Price (Rs./kg) No. Observations: 613
Model: ARIMA(3, 1, 1) Log Likelihood -3946.556
Date: Thu, 06 Nov 2025 AIC 7923.112
Time: 10:39:45 BIC 7989.363
Sample: 01-02-2011 HQIC 7948.880
- 09-25-2022
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
cos_1 24.6779 58.974 0.418 0.676 -90.909 140.265
sin_1 30.0690 56.286 0.534 0.593 -80.249 140.387
cos_2 -21.1797 31.257 -0.678 0.498 -82.442 40.083
sin_2 13.0870 38.290 0.342 0.733 -61.960 88.134
cos_3 8.2423 33.333 0.247 0.805 -57.089 73.573
sin_3 21.0912 28.204 0.748 0.455 -34.188 76.370
cos_4 -15.4877 26.412 -0.586 0.558 -67.254 36.279
sin_4 -17.5556 25.418 -0.691 0.490 -67.374 32.263
cos_5 12.3867 30.087 0.412 0.681 -46.583 71.356
sin_5 8.5902 25.788 0.333 0.739 -41.953 59.133
ar.L1 0.5269 0.047 11.213 0.000 0.435 0.619
ar.L2 0.0601 0.041 1.449 0.147 -0.021 0.141
ar.L3 -0.1457 0.038 -3.813 0.000 -0.221 -0.071
ma.L1 -0.7782 0.043 -18.048 0.000 -0.863 -0.694
sigma2 2.336e+04 637.069 36.673 0.000 2.21e+04 2.46e+04
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 67889.56
Prob(Q): 0.94 Prob(JB): 0.00
Heteroskedasticity (H): 17.58 Skew: 0.74
Prob(H) (two-sided): 0.00 Kurtosis: 54.58
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
==================================================
FOURIER SARIMA MODEL
==================================================
Performing stepwise search to minimize aic
ARIMA(2,1,2)(1,0,1)[26] intercept : AIC=7913.710, Time=8.81 sec
ARIMA(0,1,0)(0,0,0)[26] intercept : AIC=7967.513, Time=0.09 sec
ARIMA(1,1,0)(1,0,0)[26] intercept : AIC=7944.651, Time=1.74 sec
ARIMA(0,1,1)(0,0,1)[26] intercept : AIC=7942.765, Time=1.64 sec
ARIMA(0,1,0)(0,0,0)[26] : AIC=7965.525, Time=0.09 sec
ARIMA(2,1,2)(0,0,1)[26] intercept : AIC=7911.713, Time=6.45 sec
ARIMA(2,1,2)(0,0,0)[26] intercept : AIC=7916.653, Time=0.73 sec
ARIMA(2,1,2)(0,0,2)[26] intercept : AIC=7913.709, Time=15.85 sec
ARIMA(2,1,2)(1,0,0)[26] intercept : AIC=7911.772, Time=5.94 sec
ARIMA(2,1,2)(1,0,2)[26] intercept : AIC=inf, Time=19.04 sec
ARIMA(1,1,2)(0,0,1)[26] intercept : AIC=7916.837, Time=4.65 sec
ARIMA(2,1,1)(0,0,1)[26] intercept : AIC=7916.837, Time=4.82 sec
ARIMA(3,1,2)(0,0,1)[26] intercept : AIC=7908.360, Time=8.56 sec
ARIMA(3,1,2)(0,0,0)[26] intercept : AIC=7914.388, Time=1.07 sec
ARIMA(3,1,2)(1,0,1)[26] intercept : AIC=7910.416, Time=9.58 sec
ARIMA(3,1,2)(0,0,2)[26] intercept : AIC=7910.351, Time=19.03 sec
ARIMA(3,1,2)(1,0,0)[26] intercept : AIC=7908.427, Time=7.13 sec
ARIMA(3,1,2)(1,0,2)[26] intercept : AIC=7912.248, Time=21.41 sec
ARIMA(3,1,1)(0,0,1)[26] intercept : AIC=7907.233, Time=6.17 sec
ARIMA(3,1,1)(0,0,0)[26] intercept : AIC=7913.030, Time=0.91 sec
ARIMA(3,1,1)(1,0,1)[26] intercept : AIC=7909.227, Time=8.63 sec
ARIMA(3,1,1)(0,0,2)[26] intercept : AIC=7909.224, Time=17.02 sec
ARIMA(3,1,1)(1,0,0)[26] intercept : AIC=7907.293, Time=7.49 sec
ARIMA(3,1,1)(1,0,2)[26] intercept : AIC=inf, Time=21.27 sec
ARIMA(3,1,0)(0,0,1)[26] intercept : AIC=7925.684, Time=2.53 sec
ARIMA(4,1,1)(0,0,1)[26] intercept : AIC=7908.743, Time=7.75 sec
ARIMA(2,1,0)(0,0,1)[26] intercept : AIC=7946.061, Time=1.80 sec
ARIMA(4,1,0)(0,0,1)[26] intercept : AIC=7924.287, Time=2.69 sec
ARIMA(4,1,2)(0,0,1)[26] intercept : AIC=7908.950, Time=9.15 sec
ARIMA(3,1,1)(0,0,1)[26] : AIC=7905.266, Time=2.35 sec
ARIMA(3,1,1)(0,0,0)[26] : AIC=7911.076, Time=0.43 sec
ARIMA(3,1,1)(1,0,1)[26] : AIC=7907.259, Time=3.59 sec
ARIMA(3,1,1)(0,0,2)[26] : AIC=7907.257, Time=6.77 sec
ARIMA(3,1,1)(1,0,0)[26] : AIC=7905.329, Time=2.10 sec
ARIMA(3,1,1)(1,0,2)[26] : AIC=7909.257, Time=7.21 sec
ARIMA(2,1,1)(0,0,1)[26] : AIC=7914.867, Time=2.15 sec
ARIMA(3,1,0)(0,0,1)[26] : AIC=7923.701, Time=1.20 sec
ARIMA(4,1,1)(0,0,1)[26] : AIC=7906.776, Time=2.97 sec
ARIMA(3,1,2)(0,0,1)[26] : AIC=7906.392, Time=2.55 sec
ARIMA(2,1,0)(0,0,1)[26] : AIC=7944.073, Time=0.92 sec
ARIMA(2,1,2)(0,0,1)[26] : AIC=7909.742, Time=2.39 sec
ARIMA(4,1,0)(0,0,1)[26] : AIC=7922.306, Time=1.48 sec
ARIMA(4,1,2)(0,0,1)[26] : AIC=7906.848, Time=4.45 sec
Best model: ARIMA(3,1,1)(0,0,1)[26]
Total fit time: 262.643 seconds
Optimal SARIMA order: (3, 1, 1)
Optimal Seasonal order: (0, 0, 1, 26)
FOURIER SARIMA MODEL SUMMARY
==================================================
SARIMAX Results
==========================================================================================
Dep. Variable: Price (Rs./kg) No. Observations: 613
Model: SARIMAX(3, 1, 1)x(0, 0, 1, 26) Log Likelihood -3769.721
Date: Thu, 06 Nov 2025 AIC 7571.443
Time: 10:44:19 BIC 7641.361
Sample: 01-02-2011 HQIC 7598.693
- 09-25-2022
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
cos_1 24.4663 68.118 0.359 0.719 -109.042 157.975
sin_1 30.0717 63.225 0.476 0.634 -93.847 153.990
cos_2 -21.2353 28.584 -0.743 0.458 -77.258 34.787
sin_2 13.0552 36.760 0.355 0.722 -58.994 85.104
cos_3 8.0218 39.527 0.203 0.839 -69.451 85.494
sin_3 21.1340 33.143 0.638 0.524 -43.826 86.094
cos_4 -15.7023 26.415 -0.594 0.552 -67.475 36.071
sin_4 -17.9938 28.764 -0.626 0.532 -74.371 38.384
cos_5 12.5697 35.232 0.357 0.721 -56.483 81.623
sin_5 8.5754 31.656 0.271 0.786 -53.470 70.621
ar.L1 0.5375 0.047 11.446 0.000 0.445 0.630
ar.L2 0.0585 0.042 1.385 0.166 -0.024 0.141
ar.L3 -0.1566 0.040 -3.947 0.000 -0.234 -0.079
ma.L1 -0.7799 0.042 -18.553 0.000 -0.862 -0.697
ma.S.L26 -0.1166 0.025 -4.586 0.000 -0.166 -0.067
sigma2 2.361e+04 745.500 31.664 0.000 2.21e+04 2.51e+04
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 55910.34
Prob(Q): 0.93 Prob(JB): 0.00
Heteroskedasticity (H): 29.99 Skew: 1.18
Prob(H) (two-sided): 0.00 Kurtosis: 50.88
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
FOURIER ARIMA EVALUATION METRICS
==================================================
MSE: 648480.1852
RMSE: 805.2827
MAE: 681.7101
MAPE: 37.97%
R²: -2.2624
Directional Accuracy: 23.15%
FOURIER SARIMA EVALUATION METRICS
==================================================
MSE: 650370.4008
RMSE: 806.4555
MAE: 682.7201
MAPE: 38.03%
R²: -2.2719
Directional Accuracy: 24.07%
================================================== FUTURE FORECAST (NEXT 12 WEEKS) ================================================== Fourier ARIMA Future Forecast: 2024-11-03: 900.68 2024-11-10: 891.36 2024-11-17: 887.67 2024-11-24: 891.69 2024-12-01: 900.62 2024-12-08: 912.35 2024-12-15: 924.36 2024-12-22: 935.11 2024-12-29: 943.95 2025-01-05: 951.27 2025-01-12: 958.08 2025-01-19: 965.35 Fourier SARIMA Future Forecast: 2024-11-03: 902.34 2024-11-10: 889.00 2024-11-17: 884.43 2024-11-24: 895.70 2024-12-01: 906.19 2024-12-08: 916.03 2024-12-15: 936.92 2024-12-22: 946.38 2024-12-29: 947.46 2025-01-05: 957.19 2025-01-12: 969.24 2025-01-19: 966.35 ================================================== MODEL COMPARISON SUMMARY ================================================== Metric Fourier ARIMA Fourier SARIMA -------------------------------------------------- MSE 648480.1852 650370.4008 RMSE 805.2827 806.4555 MAE 681.7101 682.7201 MAPE 37.97% 38.03% R² -2.2624 -2.2719 Directional Accuracy 23.15% 24.07% Best Model: Fourier ARIMA (Lower RMSE: 805.2827 vs 806.4555) Selected Fourier ARIMA as the best performing model
In [19]:
# Plot 5: Comparison
plt.figure(figsize=(12, 6))
plt.plot(test.index, test, label='Actual Test', color='black', linewidth=3)
plt.plot(test.index, fourier_sarima_mean, label='Fourier SARIMA', color='orange', linestyle='--', linewidth=2)
plt.title('Model Comparison: Actual vs Forecasts')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result15.png", dpi=300, bbox_inches='tight')
plt.show()
In [20]:
# Plot 5: Comparison
plt.figure(figsize=(12, 6))
plt.plot(test.index, test, label='Actual Test', color='black', linewidth=3)
plt.plot(test.index, fourier_arima_mean, label='Fourier ARIMA', color='red', linestyle='--', linewidth=2)
plt.title('Model Comparison: Actual vs Forecasts')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/farima_result51.png", dpi=300, bbox_inches='tight')
plt.show()
--- Fourier + LSTM ---¶
--- Import libraries for Fourier +LSTM ----¶
Final model training on full training data (70%) and validation (15%) split¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from scipy.fft import fft, fftfreq
import warnings
warnings.filterwarnings('ignore')
# TensorFlow imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
# Convert price to Rs./kg
df['Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
prices = df['Price (Rs./kg)'].dropna()
print(f"Using original data length: {len(prices)}")
# --- Step 1: Fourier Feature Engineering ---
def add_fourier_features(prices_series, n_harmonics=5):
"""Add Fourier features based on dominant frequencies"""
# Convert to numpy array for FFT
prices_values = prices_series.values
n = len(prices_values)
t = np.arange(n)
# Perform FFT to find dominant frequencies
fft_vals = fft(prices_values)
fft_freqs = fftfreq(n)
# Get dominant frequencies (excluding DC component and negative frequencies)
positive_freq_mask = fft_freqs > 0
positive_freqs = fft_freqs[positive_freq_mask]
positive_fft_vals = np.abs(fft_vals[positive_freq_mask])
# Get indices of dominant frequencies
dominant_indices = np.argsort(positive_fft_vals)[::-1][:n_harmonics]
dom_freqs = positive_freqs[dominant_indices]
fourier_df = pd.DataFrame(index=prices_series.index)
for i, freq in enumerate(dom_freqs):
if abs(freq) < 1e-10: # Skip near-zero frequencies
continue
fourier_df[f'sin_{i}'] = np.sin(2 * np.pi * freq * t)
fourier_df[f'cos_{i}'] = np.cos(2 * np.pi * freq * t)
return fourier_df
# Generate Fourier features
fourier_features = add_fourier_features(prices, n_harmonics=5) # Changed from prices_clean to prices
# Combine original prices with Fourier features
data_with_fourier = pd.concat([prices, fourier_features], axis=1) # Changed from prices_clean to prices
# --- Step 2: Data Scaling ---
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_with_fourier)
# Separate the scaled data back into features and target
scaled_prices = scaled_data[:, 0] # First column is the target
scaled_fourier = scaled_data[:, 1:] # Remaining columns are Fourier features
# --- Step 3: Create Sequences for LSTM ---
def create_sequences(data, target, lookback=52):
"""Create sequences for LSTM training"""
X, y = [], []
for i in range(lookback, len(data)):
X.append(data[i-lookback:i]) # Lookback window
y.append(target[i]) # Target value
return np.array(X), np.array(y)
lookback = 52 # 52 weeks (1 year) lookback period
X, y = create_sequences(scaled_data, scaled_prices, lookback)
# --- Step 4: Train/Test Split ---
split1 = int(0.7 * len(X))
split2 = int(0.85 * len(X))
X_train, X_val, X_test = X[:split1], X[split1:split2], X[split2:]
y_train, y_val, y_test = y[:split1], y[split1:split2], y[split2:]
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
# --- Step 5: Hyperparameter Tuning ---
def build_model(hp):
model = Sequential()
# Number of LSTM layers
for i in range(hp.Int('num_layers', 1, 3)):
model.add(LSTM(
units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
))
model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
# Dense layers
for i in range(hp.Int('dense_layers', 0, 2)):
model.add(Dense(
units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
activation='relu'
))
model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
model.add(Dense(1, activation='linear'))
model.compile(
optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
loss='mse',
metrics=['mae']
)
return model
# Hyperparameter tuning
print("\n" + "="*50)
print("HYPERPARAMETER TUNING")
print("="*50)
tuner = RandomSearch(
build_model,
objective='val_loss',
max_trials=10,
executions_per_trial=2,
directory='lstm_tuning',
project_name='cardamom_forecast'
)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
tuner.search(
X_train, y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of LSTM layers: {best_hp.get('num_layers')}")
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
print(f"LSTM layer {i+1} units: {best_hp.get(f'units_{i}')}")
print(f"LSTM layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")
# --- Step 6: Build and Train Final Model ---
print("\n" + "="*50)
print("TRAINING FINAL MODEL")
print("="*50)
# Build final model with best hyperparameters
final_model = tuner.hypermodel.build(best_hp)
# Train the model
history = final_model.fit(
X_train, y_train,
epochs=200,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# --- Step 7: Forecasting ---
# Predict on test set
y_pred = final_model.predict(X_test).flatten()
# Inverse transform predictions
# Create dummy array for inverse transformation
dummy_array = np.zeros((len(y_pred), scaled_data.shape[1]))
dummy_array[:, 0] = y_pred
dummy_array[:, 1:] = scaled_fourier[lookback+split2:lookback+split2+len(y_pred)] # Corresponding Fourier features
# Inverse transform
inverse_transformed = scaler.inverse_transform(dummy_array)
forecast_mean = inverse_transformed[:, 0]
# Get actual values (inverse transformed)
actual_dummy = np.zeros((len(y_test), scaled_data.shape[1]))
actual_dummy[:, 0] = y_test
actual_dummy[:, 1:] = scaled_fourier[lookback+split2:lookback+split2+len(y_test)]
actual_inverse = scaler.inverse_transform(actual_dummy)
actual_values = actual_inverse[:, 0]
# --- Step 8: Evaluation ---
def evaluate_forecast(actual, forecast):
"""Comprehensive forecast evaluation"""
actual = np.asarray(actual)
forecast = np.asarray(forecast)
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
with np.errstate(divide='ignore', invalid='ignore'):
ape = np.where(actual != 0, np.abs(actual - forecast) / actual, np.nan)
mape = np.nanmean(ape) * 100
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = np.mean(actual_diff == forecast_diff) * 100
return {
'MSE': f"{mse:.4f}",
'RMSE': f"{rmse:.4f}",
'MAE': f"{mae:.4f}",
'MAPE': f"{mape:.2f}%" if not np.isnan(mape) else "N/A",
'R²': f"{r2:.4f}",
'Directional Accuracy': f"{da:.2f}%"
}
metrics = evaluate_forecast(actual_values, forecast_mean)
print("\n" + "="*50)
print("MODEL SUMMARY")
print("="*50)
print(f"Lookback period: {lookback} weeks")
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print("\nArchitecture:")
final_model.summary()
print("\n" + "="*50)
print("FORECAST EVALUATION METRICS")
print("="*50)
for metric, value in metrics.items():
print(f"{metric}: {value}")
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original Data
plt.figure(figsize=(12, 6))
plt.plot(prices.index, prices, label='Original Data', color='blue', linewidth=2)
plt.title('Original Cardamom Price Data')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Forecast Results
test_dates = prices.index[lookback+split2:lookback+split2+len(actual_values)] # Changed from prices_clean to prices
plt.figure(figsize=(12, 6))
plt.plot(prices.index[:lookback+split2], prices[:lookback+split2], label='Train+Val', color='blue', alpha=0.7) # Changed from prices_clean to prices
plt.plot(test_dates, actual_values, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, forecast_mean, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Cardamom Price Forecasting with Fourier LSTM')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 10: Future Forecasting ---
def forecast_future(model, last_sequence, fourier_features, scaler, steps=12):
"""Forecast future values"""
forecasts = []
current_sequence = last_sequence.copy()
for _ in range(steps):
# Predict next value
prediction = model.predict(current_sequence.reshape(1, lookback, -1), verbose=0)[0, 0]
# Update sequence
new_row = np.zeros_like(current_sequence[0])
new_row[0] = prediction # Price prediction
new_row[1:] = current_sequence[-1, 1:] # Keep Fourier features same as last observation
current_sequence = np.vstack([current_sequence[1:], new_row])
forecasts.append(prediction)
# Inverse transform
dummy_array = np.zeros((len(forecasts), scaled_data.shape[1]))
dummy_array[:, 0] = forecasts
dummy_array[:, 1:] = fourier_features[-len(forecasts):] if len(fourier_features) >= len(forecasts) else fourier_features[-1]
inverse_forecasts = scaler.inverse_transform(dummy_array)[:, 0]
# Create future dates
last_date = prices.index[-1] # Changed from prices_clean to prices
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
return future_dates, inverse_forecasts
# Forecast next 12 weeks
try:
last_sequence = scaled_data[-lookback:] # Last lookback window
future_dates, future_prices = forecast_future(final_model, last_sequence, scaled_fourier, scaler, steps=12)
print("\n" + "="*50)
print("FUTURE FORECAST (NEXT 12 WEEKS)")
print("="*50)
for date, price in zip(future_dates, future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
except Exception as e:
print(f"Future forecasting failed: {e}")
# --- Additional: Residual Analysis ---
print("\n" + "="*50)
print("RESIDUAL ANALYSIS")
print("="*50)
residuals = actual_values - forecast_mean
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
# Plot residuals
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('Model Residuals')
plt.grid(True)
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result4.png", dpi=300, bbox_inches='tight')
plt.show()
Using original data length: 722 Training sequences: (468, 52, 11) Validation sequences: (101, 52, 11) Test sequences: (101, 52, 11) ================================================== HYPERPARAMETER TUNING ================================================== Reloading Tuner from lstm_tuning\cardamom_forecast\tuner0.json Best Hyperparameters: Number of LSTM layers: 3 Learning rate: 0.0007999550401225116 LSTM layer 1 units: 192 LSTM layer 1 dropout: 0.1 LSTM layer 2 units: 64 LSTM layer 2 dropout: 0.4 LSTM layer 3 units: 32 LSTM layer 3 dropout: 0.1 ================================================== TRAINING FINAL MODEL ================================================== Epoch 1/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 18s 241ms/step - loss: 0.1027 - mae: 0.2346 - val_loss: 0.0042 - val_mae: 0.0481 Epoch 2/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 119ms/step - loss: 0.0175 - mae: 0.0947 - val_loss: 0.0106 - val_mae: 0.0900 Epoch 3/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 118ms/step - loss: 0.0124 - mae: 0.0746 - val_loss: 0.0148 - val_mae: 0.1152 Epoch 4/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 117ms/step - loss: 0.0132 - mae: 0.0734 - val_loss: 0.0150 - val_mae: 0.1172 Epoch 5/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 115ms/step - loss: 0.0156 - mae: 0.0762 - val_loss: 0.0218 - val_mae: 0.1397 Epoch 6/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 117ms/step - loss: 0.0102 - mae: 0.0652 - val_loss: 0.0175 - val_mae: 0.1270 Epoch 7/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 113ms/step - loss: 0.0098 - mae: 0.0616 - val_loss: 0.0206 - val_mae: 0.1338 Epoch 8/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 115ms/step - loss: 0.0100 - mae: 0.0629 - val_loss: 0.0167 - val_mae: 0.1201 Epoch 9/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 118ms/step - loss: 0.0092 - mae: 0.0611 - val_loss: 0.0247 - val_mae: 0.1473 Epoch 10/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 119ms/step - loss: 0.0113 - mae: 0.0640 - val_loss: 0.0145 - val_mae: 0.1136 Epoch 11/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 2s 114ms/step - loss: 0.0097 - mae: 0.0628 - val_loss: 0.0124 - val_mae: 0.1038 4/4 ━━━━━━━━━━━━━━━━━━━━ 2s 349ms/step ================================================== MODEL SUMMARY ================================================== Lookback period: 52 weeks Final epochs trained: 11 Best validation loss: 0.0042 Best validation MAE: 0.0481 Architecture:
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ lstm (LSTM) │ (None, 52, 192) │ 156,672 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout (Dropout) │ (None, 52, 192) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ lstm_1 (LSTM) │ (None, 52, 64) │ 65,792 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_1 (Dropout) │ (None, 52, 64) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ lstm_2 (LSTM) │ (None, 32) │ 12,416 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_2 (Dropout) │ (None, 32) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense (Dense) │ (None, 112) │ 3,696 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_3 (Dropout) │ (None, 112) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 32) │ 3,616 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_4 (Dropout) │ (None, 32) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_2 (Dense) │ (None, 1) │ 33 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 726,677 (2.77 MB)
Trainable params: 242,225 (946.19 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 484,452 (1.85 MB)
================================================== FORECAST EVALUATION METRICS ================================================== MSE: 669119.3890 RMSE: 817.9972 MAE: 753.7879 MAPE: 45.13% R²: -2.8036 Directional Accuracy: 21.00%
================================================== FUTURE FORECAST (NEXT 12 WEEKS) ================================================== 2024-11-03: 1408.34 2024-11-10: 1409.47 2024-11-17: 1409.35 2024-11-24: 1407.97 2024-12-01: 1405.36 2024-12-08: 1401.72 2024-12-15: 1397.19 2024-12-22: 1391.41 2024-12-29: 1385.15 2025-01-05: 1378.57 2025-01-12: 1371.84 2025-01-19: 1365.09 ================================================== RESIDUAL ANALYSIS ================================================== Residual mean: 753.7879 Residual std: 317.6841
In [2]:
import matplotlib.pyplot as plt
import numpy as np
# Reconstruct denoised signal from first n_harmonics frequencies
# (This assumes you're using the same FFT approach as earlier)
def denoise_signal_fft(prices, n_harmonics=5):
n = len(prices)
fft_vals = fft(prices)
fft_freqs = fftfreq(n)
# Zero-out all but the largest frequency components
denoised_fft = np.zeros_like(fft_vals)
positive_freq_mask = fft_freqs > 0
positive_fft_vals = np.abs(fft_vals[positive_freq_mask])
dominant_indices = np.argsort(positive_fft_vals)[::-1][:n_harmonics]
# Add back DC component
denoised_fft[0] = fft_vals[0]
# Add back n_harmonics dominant frequencies
denoised_indices = np.where(positive_freq_mask)[0][dominant_indices]
for idx in denoised_indices:
denoised_fft[idx] = fft_vals[idx]
denoised_fft[-idx] = fft_vals[-idx] # Mirror negative frequencies
# Inverse FFT to get denoised signal
denoised_signal = np.real(np.fft.ifft(denoised_fft))
return denoised_signal
denoised_prices = denoise_signal_fft(prices.values, n_harmonics=5)
# Plot original and denoised series
plt.figure(figsize=(14, 6))
plt.plot(prices.index, prices, label='Original Time Series', color='blue', linewidth=2)
plt.plot(prices.index, denoised_prices, label='Denoised (Fourier, Top 5 Harmonics)', color='red', linewidth=2)
plt.title('Original vs Denoised (Trend) Cardamom Price Time Series')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fourier_denoised_timeseries3.png", dpi=300, bbox_inches='tight')
plt.show()
In [19]:
# --- Step 11: Detailed Model Configuration Report ---
print("\n" + "="*60)
print("LSTM MODEL CONFIGURATION & TRAINING DETAILS")
print("="*60)
# Optimizer details
optimizer_config = final_model.optimizer.get_config()
print(f"Optimizer: {final_model.optimizer.__class__.__name__}")
print(f"Learning Rate: {optimizer_config['learning_rate']}")
# Model architecture details
for i, layer in enumerate(final_model.layers):
print(f"\nLayer {i+1}: {layer.__class__.__name__}")
if hasattr(layer, "units"):
print(f" Units: {layer.units}")
if hasattr(layer, "activation"):
print(f" Activation: {layer.activation.__name__}")
if hasattr(layer, "rate"):
print(f" Dropout Rate: {layer.rate}")
if hasattr(layer, "return_sequences"):
print(f" Return Sequences: {layer.return_sequences}")
# Training summary
print("\nTraining Summary:")
print(f"Total Epochs Trained: {len(history.history['loss'])}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Training MAE: {history.history['mae'][-1]:.4f}")
print(f"Final Validation MAE: {history.history['val_mae'][-1]:.4f}")
print("\n" + "="*60)
print("NOTE: This report captures optimizer, learning rate, "
"activation functions, and all layer details for reproducibility.")
print("="*60)
============================================================ LSTM MODEL CONFIGURATION & TRAINING DETAILS ============================================================ Optimizer: Adam Learning Rate: 0.0007999550434760749 Layer 1: LSTM Units: 192 Activation: tanh Return Sequences: True Layer 2: Dropout Dropout Rate: 0.1 Layer 3: LSTM Units: 64 Activation: tanh Return Sequences: True Layer 4: Dropout Dropout Rate: 0.4 Layer 5: LSTM Units: 32 Activation: tanh Return Sequences: False Layer 6: Dropout Dropout Rate: 0.1 Layer 7: Dense Units: 112 Activation: relu Layer 8: Dropout Dropout Rate: 0.1 Layer 9: Dense Units: 32 Activation: relu Layer 10: Dropout Dropout Rate: 0.2 Layer 11: Dense Units: 1 Activation: linear Training Summary: Total Epochs Trained: 11 Final Training Loss: 0.0087 Final Validation Loss: 0.0183 Final Training MAE: 0.0536 Final Validation MAE: 0.1286 ============================================================ NOTE: This report captures optimizer, learning rate, activation functions, and all layer details for reproducibility. ============================================================
In [27]:
# --- Step 9: Visualization ---
plt.figure(figsize=(12, 6))
# Plot 3: Forecast Results
test_dates = prices.index[lookback+split2:lookback+split2+len(actual_values)] # Changed from prices_clean to prices
plt.plot(test_dates, actual_values, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, forecast_mean, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Cardamom Price Forecasting with Fourier LSTM')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flst_result66.png", dpi=300, bbox_inches='tight')
plt.show()
Fourier + GRU¶
Import Libraries for Fourier + GRU¶
In [70]:
In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from scipy.fft import fft, fftfreq
import warnings
warnings.filterwarnings('ignore')
# TensorFlow imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout # Changed LSTM to GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
# Load and preprocess data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
# Convert price to Rs./kg
df['Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
prices = df['Price (Rs./kg)'].dropna()
print(f"Using original data length: {len(prices)}")
# --- Step 1: Fourier Feature Engineering ---
def add_fourier_features(prices_series, n_harmonics=5):
"""Add Fourier features based on dominant frequencies"""
# Convert to numpy array for FFT
prices_values = prices_series.values
n = len(prices_values)
t = np.arange(n)
# Perform FFT to find dominant frequencies
fft_vals = fft(prices_values)
fft_freqs = fftfreq(n)
# Get dominant frequencies (excluding DC component and negative frequencies)
positive_freq_mask = fft_freqs > 0
positive_freqs = fft_freqs[positive_freq_mask]
positive_fft_vals = np.abs(fft_vals[positive_freq_mask])
# Get indices of dominant frequencies
dominant_indices = np.argsort(positive_fft_vals)[::-1][:n_harmonics]
dom_freqs = positive_freqs[dominant_indices]
fourier_df = pd.DataFrame(index=prices_series.index)
for i, freq in enumerate(dom_freqs):
if abs(freq) < 1e-10: # Skip near-zero frequencies
continue
fourier_df[f'sin_{i}'] = np.sin(2 * np.pi * freq * t)
fourier_df[f'cos_{i}'] = np.cos(2 * np.pi * freq * t)
return fourier_df
# Generate Fourier features
fourier_features = add_fourier_features(prices, n_harmonics=5)
# Combine original prices with Fourier features
data_with_fourier = pd.concat([prices, fourier_features], axis=1)
# --- Step 2: Data Scaling ---
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_with_fourier)
# Separate the scaled data back into features and target
scaled_prices = scaled_data[:, 0] # First column is the target
scaled_fourier = scaled_data[:, 1:] # Remaining columns are Fourier features
# --- Step 3: Create Sequences for GRU ---
def create_sequences(data, target, lookback=52):
"""Create sequences for GRU training"""
X, y = [], []
for i in range(lookback, len(data)):
X.append(data[i-lookback:i]) # Lookback window
y.append(target[i]) # Target value
return np.array(X), np.array(y)
lookback = 52 # 52 weeks (1 year) lookback period
X, y = create_sequences(scaled_data, scaled_prices, lookback)
# --- Step 4: Train/Test Split ---
split1 = int(0.7 * len(X))
split2 = int(0.85 * len(X))
X_train, X_val, X_test = X[:split1], X[split1:split2], X[split2:]
y_train, y_val, y_test = y[:split1], y[split1:split2], y[split2:]
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
# --- Step 5: Hyperparameter Tuning for GRU ---
def build_model(hp):
model = Sequential()
# Number of GRU layers (changed from LSTM to GRU)
for i in range(hp.Int('num_layers', 1, 3)):
model.add(GRU( # Changed LSTM to GRU
units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
))
model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
# Dense layers
for i in range(hp.Int('dense_layers', 0, 2)):
model.add(Dense(
units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
activation='relu'
))
model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
model.add(Dense(1, activation='linear'))
model.compile(
optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
loss='mse',
metrics=['mae']
)
return model
# Hyperparameter tuning
print("\n" + "="*50)
print("HYPERPARAMETER TUNING FOR GRU")
print("="*50)
tuner = RandomSearch(
build_model,
objective='val_loss',
max_trials=10,
executions_per_trial=2,
directory='gru_tuning', # Changed from lstm_tuning to gru_tuning
project_name='cardamom_forecast_gru' # Added _gru suffix
)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
tuner.search(
X_train, y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of GRU layers: {best_hp.get('num_layers')}") # Changed from LSTM to GRU
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
print(f"GRU layer {i+1} units: {best_hp.get(f'units_{i}')}") # Changed from LSTM to GRU
print(f"GRU layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}") # Changed from LSTM to GRU
# --- Step 6: Build and Train Final GRU Model ---
print("\n" + "="*50)
print("TRAINING FINAL GRU MODEL") # Changed from LSTM to GRU
print("="*50)
# Build final model with best hyperparameters
final_model = tuner.hypermodel.build(best_hp)
# Train the model
history = final_model.fit(
X_train, y_train,
epochs=200,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# --- Step 7: Forecasting ---
# Predict on test set
y_pred = final_model.predict(X_test).flatten()
# Inverse transform predictions
# Create dummy array for inverse transformation
dummy_array = np.zeros((len(y_pred), scaled_data.shape[1]))
dummy_array[:, 0] = y_pred
dummy_array[:, 1:] = scaled_fourier[lookback+split2:lookback+split2+len(y_pred)] # Corresponding Fourier features
# Inverse transform
inverse_transformed = scaler.inverse_transform(dummy_array)
forecast_mean = inverse_transformed[:, 0]
# Get actual values (inverse transformed)
actual_dummy = np.zeros((len(y_test), scaled_data.shape[1]))
actual_dummy[:, 0] = y_test
actual_dummy[:, 1:] = scaled_fourier[lookback+split2:lookback+split2+len(y_test)]
actual_inverse = scaler.inverse_transform(actual_dummy)
actual_values = actual_inverse[:, 0]
# --- Step 8: Evaluation ---
def evaluate_forecast(actual, forecast):
"""Comprehensive forecast evaluation"""
actual = np.asarray(actual)
forecast = np.asarray(forecast)
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
with np.errstate(divide='ignore', invalid='ignore'):
ape = np.where(actual != 0, np.abs(actual - forecast) / actual, np.nan)
mape = np.nanmean(ape) * 100
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = np.mean(actual_diff == forecast_diff) * 100
return {
'MSE': f"{mse:.4f}",
'RMSE': f"{rmse:.4f}",
'MAE': f"{mae:.4f}",
'MAPE': f"{mape:.2f}%" if not np.isnan(mape) else "N/A",
'R²': f"{r2:.4f}",
'Directional Accuracy': f"{da:.2f}%"
}
metrics = evaluate_forecast(actual_values, forecast_mean)
print("\n" + "="*50)
print("MODEL SUMMARY - FOURIER GRU") # Changed to GRU
print("="*50)
print(f"Lookback period: {lookback} weeks")
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print("\nArchitecture:")
final_model.summary()
print("\n" + "="*50)
print("FORECAST EVALUATION METRICS")
print("="*50)
for metric, value in metrics.items():
print(f"{metric}: {value}")
# --- Step 9: Visualization ---
plt.figure(figsize=(16, 12))
# Plot 1: Training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('GRU Model Training History') # Changed to GRU
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fgr_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original Data
plt.figure(figsize=(12, 6))
plt.plot(prices.index, prices, label='Original Data', color='blue', linewidth=2)
plt.title('Original Cardamom Price Data')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fgr_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Forecast Results
test_dates = prices.index[lookback+split2:lookback+split2+len(actual_values)]
plt.figure(figsize=(12, 6))
plt.plot(prices.index[:lookback+split2], prices[:lookback+split2], label='Train+Val', color='blue', alpha=0.7)
plt.plot(test_dates, actual_values, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, forecast_mean, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Cardamom Price Forecasting with Fourier GRU') # Changed to GRU
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fgr_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 10: Future Forecasting ---
def forecast_future(model, last_sequence, fourier_features, scaler, steps=12):
"""Forecast future values"""
forecasts = []
current_sequence = last_sequence.copy()
for _ in range(steps):
# Predict next value
prediction = model.predict(current_sequence.reshape(1, lookback, -1), verbose=0)[0, 0]
# Update sequence
new_row = np.zeros_like(current_sequence[0])
new_row[0] = prediction # Price prediction
new_row[1:] = current_sequence[-1, 1:] # Keep Fourier features same as last observation
current_sequence = np.vstack([current_sequence[1:], new_row])
forecasts.append(prediction)
# Inverse transform
dummy_array = np.zeros((len(forecasts), scaled_data.shape[1]))
dummy_array[:, 0] = forecasts
dummy_array[:, 1:] = fourier_features[-len(forecasts):] if len(fourier_features) >= len(forecasts) else fourier_features[-1]
inverse_forecasts = scaler.inverse_transform(dummy_array)[:, 0]
# Create future dates
last_date = prices.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
return future_dates, inverse_forecasts
# Forecast next 12 weeks
try:
last_sequence = scaled_data[-lookback:] # Last lookback window
future_dates, future_prices = forecast_future(final_model, last_sequence, scaled_fourier, scaler, steps=12)
print("\n" + "="*50)
print("FUTURE FORECAST (NEXT 12 WEEKS)")
print("="*50)
for date, price in zip(future_dates, future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
except Exception as e:
print(f"Future forecasting failed: {e}")
# --- Additional: Residual Analysis ---
print("\n" + "="*50)
print("RESIDUAL ANALYSIS")
print("="*50)
residuals = actual_values - forecast_mean
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
# Plot residuals
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('GRU Model Residuals') # Changed to GRU
plt.grid(True)
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/fgr_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Additional: Compare GRU performance benefits ---
print("\n" + "="*50)
print("GRU vs LSTM COMPARISON")
print("="*50)
print("GRU Advantages:")
print("- Fewer parameters (faster training)")
print("- Simpler architecture (2 gates vs LSTM's 3 gates)")
print("- Better performance on smaller datasets")
print("- Less prone to overfitting")
print("- More efficient memory usage")
Using original data length: 722 Training sequences: (468, 52, 11) Validation sequences: (101, 52, 11) Test sequences: (101, 52, 11) ================================================== HYPERPARAMETER TUNING FOR GRU ================================================== Reloading Tuner from gru_tuning\cardamom_forecast_gru\tuner0.json Best Hyperparameters: Number of GRU layers: 1 Learning rate: 0.00349041012762722 GRU layer 1 units: 160 GRU layer 1 dropout: 0.5 ================================================== TRAINING FINAL GRU MODEL ================================================== Epoch 1/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 6s 113ms/step - loss: 0.1296 - mae: 0.2756 - val_loss: 0.0297 - val_mae: 0.1638 Epoch 2/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0246 - mae: 0.1162 - val_loss: 0.0252 - val_mae: 0.1507 Epoch 3/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0167 - mae: 0.0902 - val_loss: 0.0327 - val_mae: 0.1769 Epoch 4/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0161 - mae: 0.0875 - val_loss: 0.0232 - val_mae: 0.1484 Epoch 5/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0131 - mae: 0.0767 - val_loss: 0.0224 - val_mae: 0.1445 Epoch 6/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0127 - mae: 0.0795 - val_loss: 0.0251 - val_mae: 0.1528 Epoch 7/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0108 - mae: 0.0709 - val_loss: 0.0256 - val_mae: 0.1564 Epoch 8/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0109 - mae: 0.0670 - val_loss: 0.0134 - val_mae: 0.1117 Epoch 9/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0149 - mae: 0.0730 - val_loss: 0.0131 - val_mae: 0.1102 Epoch 10/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0143 - mae: 0.0775 - val_loss: 0.0148 - val_mae: 0.1156 Epoch 11/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0101 - mae: 0.0624 - val_loss: 0.0132 - val_mae: 0.1097 Epoch 12/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0083 - mae: 0.0575 - val_loss: 0.0142 - val_mae: 0.1151 Epoch 13/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0125 - mae: 0.0630 - val_loss: 0.0129 - val_mae: 0.1085 Epoch 14/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0075 - mae: 0.0547 - val_loss: 0.0202 - val_mae: 0.1372 Epoch 15/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0091 - mae: 0.0584 - val_loss: 0.0121 - val_mae: 0.1057 Epoch 16/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0087 - mae: 0.0556 - val_loss: 0.0202 - val_mae: 0.1365 Epoch 17/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0101 - mae: 0.0536 - val_loss: 0.0154 - val_mae: 0.1197 Epoch 18/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 40ms/step - loss: 0.0076 - mae: 0.0503 - val_loss: 0.0162 - val_mae: 0.1222 Epoch 19/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0075 - mae: 0.0524 - val_loss: 0.0125 - val_mae: 0.1080 Epoch 20/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0081 - mae: 0.0529 - val_loss: 0.0101 - val_mae: 0.0962 Epoch 21/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0105 - mae: 0.0601 - val_loss: 0.0128 - val_mae: 0.1082 Epoch 22/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0069 - mae: 0.0503 - val_loss: 0.0129 - val_mae: 0.1073 Epoch 23/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0059 - mae: 0.0479 - val_loss: 0.0068 - val_mae: 0.0786 Epoch 24/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0085 - mae: 0.0555 - val_loss: 0.0090 - val_mae: 0.0903 Epoch 25/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0070 - mae: 0.0502 - val_loss: 0.0094 - val_mae: 0.0926 Epoch 26/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0071 - mae: 0.0522 - val_loss: 0.0183 - val_mae: 0.1245 Epoch 27/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0072 - mae: 0.0509 - val_loss: 0.0148 - val_mae: 0.1150 Epoch 28/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0063 - mae: 0.0491 - val_loss: 0.0099 - val_mae: 0.0953 Epoch 29/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0056 - mae: 0.0472 - val_loss: 0.0135 - val_mae: 0.1104 Epoch 30/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0078 - mae: 0.0528 - val_loss: 0.0173 - val_mae: 0.1262 Epoch 31/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0069 - mae: 0.0513 - val_loss: 0.0175 - val_mae: 0.1276 Epoch 32/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0077 - mae: 0.0532 - val_loss: 0.0090 - val_mae: 0.0906 Epoch 33/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0069 - mae: 0.0512 - val_loss: 0.0094 - val_mae: 0.0926 4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 111ms/step ================================================== MODEL SUMMARY - FOURIER GRU ================================================== Lookback period: 52 weeks Final epochs trained: 33 Best validation loss: 0.0068 Best validation MAE: 0.0786 Architecture:
Model: "sequential_3"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ gru_2 (GRU) │ (None, 160) │ 83,040 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_10 (Dropout) │ (None, 160) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_6 (Dense) │ (None, 112) │ 18,032 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_11 (Dropout) │ (None, 112) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_7 (Dense) │ (None, 1) │ 113 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 303,557 (1.16 MB)
Trainable params: 101,185 (395.25 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 202,372 (790.52 KB)
================================================== FORECAST EVALUATION METRICS ================================================== MSE: 520635.0803 RMSE: 721.5505 MAE: 661.0315 MAPE: 39.48% R²: -1.9596 Directional Accuracy: 20.00%
<Figure size 1600x1200 with 0 Axes>
================================================== FUTURE FORECAST (NEXT 12 WEEKS) ================================================== 2024-11-03: 1181.33 2024-11-10: 917.99 2024-11-17: 827.83 2024-11-24: 800.37 2024-12-01: 791.48 2024-12-08: 789.89 2024-12-15: 790.28 2024-12-22: 790.75 2024-12-29: 790.94 2025-01-05: 790.95 2025-01-12: 790.90 2025-01-19: 790.86 ================================================== RESIDUAL ANALYSIS ================================================== Residual mean: 661.0315 Residual std: 289.2620
================================================== GRU vs LSTM COMPARISON ================================================== GRU Advantages: - Fewer parameters (faster training) - Simpler architecture (2 gates vs LSTM's 3 gates) - Better performance on smaller datasets - Less prone to overfitting - More efficient memory usage
In [23]:
# --- Step 11: Detailed Model Configuration Report & JSON Export ---
import json
import inspect
from tensorflow.keras import backend as K
print("\n" + "="*60)
print("FOURIER + GRU MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)
report = {}
# Optimizer details (robust handling for TensorFlow objects)
try:
opt = final_model.optimizer
opt_name = opt.__class__.__name__
# learning_rate may be a schedule/variable; get numeric if possible
try:
lr_val = float(K.get_value(opt.learning_rate))
except Exception:
# fallback to config if present
opt_cfg = opt.get_config()
lr_val = opt_cfg.get('learning_rate', None)
try:
lr_val = float(lr_val)
except Exception:
pass
report['optimizer'] = opt_name
report['learning_rate'] = lr_val
print(f"Optimizer: {opt_name}")
print(f"Learning Rate: {lr_val}")
except Exception as e:
report['optimizer'] = str(e)
print(f"Could not fetch optimizer details: {e}")
# Hyperparameter info (if tuner/best_hp available)
try:
report['best_hyperparameters'] = best_hp.values
print("\nBest Hyperparameters (from tuner):")
for k, v in best_hp.values.items():
print(f" {k}: {v}")
except Exception:
# best_hp may not be available in some contexts
try:
# If tuner exists and has get_best_hyperparameters
best = tuner.get_best_hyperparameters(num_trials=1)[0]
report['best_hyperparameters'] = best.values
print("\nBest Hyperparameters (from tuner):")
for k, v in best.values.items():
print(f" {k}: {v}")
except Exception:
report['best_hyperparameters'] = None
# Model architecture details
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
layer_info = {}
layer_info['index'] = i + 1
layer_info['class_name'] = layer.__class__.__name__
layer_info['name'] = layer.name
# Units (for recurrent / dense)
if hasattr(layer, 'units'):
try:
layer_info['units'] = int(layer.units)
print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer.units}")
except Exception:
layer_info['units'] = str(getattr(layer, 'units', None))
print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
else:
print(f" Layer {i+1}: {layer.__class__.__name__}")
# Activation
if hasattr(layer, 'activation'):
try:
layer_info['activation'] = layer.activation.__name__
print(f" activation: {layer_info['activation']}")
except Exception:
layer_info['activation'] = str(getattr(layer, 'activation', None))
print(f" activation: {layer_info['activation']}")
# Dropout rate
if hasattr(layer, 'rate'):
try:
layer_info['dropout_rate'] = float(layer.rate)
print(f" dropout_rate: {layer_info['dropout_rate']}")
except Exception:
layer_info['dropout_rate'] = str(getattr(layer, 'rate', None))
print(f" dropout_rate: {layer_info['dropout_rate']}")
# return_sequences
if hasattr(layer, 'return_sequences'):
try:
layer_info['return_sequences'] = bool(layer.return_sequences)
print(f" return_sequences: {layer_info['return_sequences']}")
except Exception:
layer_info['return_sequences'] = str(getattr(layer, 'return_sequences', None))
print(f" return_sequences: {layer_info['return_sequences']}")
# input/output shapes (if available)
try:
layer_info['input_shape'] = layer.input_shape
layer_info['output_shape'] = layer.output_shape
except Exception:
layer_info['input_shape'] = None
layer_info['output_shape'] = None
layers_report.append(layer_info)
report['layers'] = layers_report
# Training summary
training_summary = {}
training_summary['lookback'] = int(lookback) if 'lookback' in globals() else None
training_summary['epochs_trained'] = len(history.history['loss'])
training_summary['final_training_loss'] = float(history.history['loss'][-1])
training_summary['final_validation_loss'] = float(history.history['val_loss'][-1]) if history.history.get('val_loss') else None
# try to capture MAE if present
training_summary['final_training_mae'] = float(history.history.get('mae')[-1]) if history.history.get('mae') else None
training_summary['final_validation_mae'] = float(history.history.get('val_mae')[-1]) if history.history.get('val_mae') else None
print("\nTraining Summary:")
print(f" Lookback period: {training_summary['lookback']} weeks")
print(f" Epochs Trained: {training_summary['epochs_trained']}")
print(f" Final Training Loss: {training_summary['final_training_loss']:.6f}")
if training_summary['final_validation_loss'] is not None:
print(f" Final Validation Loss: {training_summary['final_validation_loss']:.6f}")
if training_summary['final_training_mae'] is not None:
print(f" Final Training MAE: {training_summary['final_training_mae']:.6f}")
if training_summary['final_validation_mae'] is not None:
print(f" Final Validation MAE: {training_summary['final_validation_mae']:.6f}")
report['training_summary'] = training_summary
# Evaluation metrics (from earlier)
try:
report['evaluation_metrics'] = metrics
print("\nEvaluation Metrics:")
for k, v in metrics.items():
print(f" {k}: {v}")
except Exception as e:
report['evaluation_metrics'] = None
print(f"Could not attach evaluation metrics: {e}")
# Residual stats
try:
residuals_stats = {
'residual_mean': float(np.mean(residuals)),
'residual_std': float(np.std(residuals)),
'residual_min': float(np.min(residuals)),
'residual_max': float(np.max(residuals))
}
report['residuals'] = residuals_stats
print("\nResiduals:")
print(f" Mean: {residuals_stats['residual_mean']:.6f}")
print(f" Std : {residuals_stats['residual_std']:.6f}")
print(f" Min : {residuals_stats['residual_min']:.6f}")
print(f" Max : {residuals_stats['residual_max']:.6f}")
except Exception as e:
print(f"Could not compute residual stats: {e}")
# Timestamp & environment info
import datetime, platform
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__
# Save JSON report
report_filename = "fourier_gru_report.json"
try:
with open(report_filename, 'w', encoding='utf-8') as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\nSaved detailed report to: {report_filename}")
except Exception as e:
print(f"Failed to save JSON report: {e}")
print("\n" + "="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================
FOURIER + GRU MODEL CONFIGURATION & TRAINING REPORT
============================================================
Optimizer: Adam
Learning Rate: 0.0034904100466519594
Best Hyperparameters (from tuner):
num_layers: 1
units_0: 160
dropout_0: 0.5
dense_layers: 1
learning_rate: 0.00349041012762722
units_1: 192
dropout_1: 0.4
units_2: 32
dropout_2: 0.4
dense_units_0: 112
dense_dropout_0: 0.5
dense_units_1: 16
dense_dropout_1: 0.4
Model Layers:
Layer 1: GRU - units: 160
activation: tanh
return_sequences: False
Layer 2: Dropout
dropout_rate: 0.5
Layer 3: Dense - units: 112
activation: relu
Layer 4: Dropout
dropout_rate: 0.5
Layer 5: Dense - units: 1
activation: linear
Training Summary:
Lookback period: 52 weeks
Epochs Trained: 26
Final Training Loss: 0.007103
Final Validation Loss: 0.015748
Final Training MAE: 0.049101
Final Validation MAE: 0.118491
Evaluation Metrics:
MSE: 475207.6184
RMSE: 689.3530
MAE: 628.8056
MAPE: 37.27%
R²: -1.7013
Directional Accuracy: 19.00%
Residuals:
Mean: 628.805635
Std : 282.508569
Min : 59.539413
Max : 1762.257271
Saved detailed report to: fourier_gru_report.json
============================================================
REPORT COMPLETE
============================================================
In [29]:
# Plot 3: Forecast Results
# --- Step 9: Visualization ---
plt.figure(figsize=(12, 6))
test_dates = prices.index[lookback+split2:lookback+split2+len(actual_values)]
plt.plot(test_dates, actual_values, label='Actual Test', color='green', linewidth=2)
plt.plot(test_dates, forecast_mean, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Cardamom Price Forecasting with Fourier GRU') # Changed to GRU
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/flgr_result4.png", dpi=300, bbox_inches='tight')
plt.show()
Wavelet + ARIMA¶
Import Libraries for Wavelet + ARIMA¶
In [33]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA # 👈 ADD THIS
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from itertools import product
from sklearn.preprocessing import MinMaxScaler
# Suppress warnings
warnings.filterwarnings("ignore")
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
# Perform DWT decomposition with db4 at level 3
wavelet = 'db4'
level = 3
coeffs = pywt.wavedec(data, wavelet, level=level)
# Denoise by keeping only the approximation coefficients (remove high-frequency details)
denoised_coeffs = [coeffs[0]] + [np.zeros_like(c) for c in coeffs[1:]]
denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)] # Ensure same length
# Alternatively, you can keep some of the detail coefficients if they contain meaningful information
# For example, keep the first level detail:
# denoised_coeffs = [coeffs[0], coeffs[1]] + [np.zeros_like(c) for c in coeffs[2:]]
# denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]
# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original')
plt.plot(df.index, denoised_data, label='Denoised', linestyle='--')
plt.title("Original vs Denoised Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/warima_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Data splitting
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
# ARIMA Hyperparameter tuning
param_grid = {
'order': [(0,1,1), (1,1,1), (2,1,1), (0,1,0), (1,1,0), (2,0,0), (3,0,1),(0,1,1), (1,1,1), (2,1,1), (0,1,0), (1,1,0), (2,0,0), (3,0,1),(4,1,0),(2,0,1),(2,0,2),(3,0,7),(0,0,1),(0,1,2)]
}
best_score = float('inf')
best_params = None
best_model = None
print("Training ARIMA on denoised data...")
for order in param_grid['order']:
try:
model = ARIMA(denoised_data[:train_size], order=order)
model_fit = model.fit()
val_pred = model_fit.forecast(steps=val_size)
score = mean_squared_error(
denoised_data[train_size:train_size+val_size], val_pred
)
if score < best_score:
best_score = score
best_params = {'order': order}
best_model = model_fit
except Exception as e:
print(f"Failed for {order}: {e}")
continue
if best_params is None:
raise ValueError("No ARIMA model could be fitted. Try expanding param_grid.")
print(f"Best params: {best_params} with MSE: {best_score:.4f}")
# Final model
final_model = ARIMA(
denoised_data[:train_size+val_size],
order=best_params['order']
)
final_model_fit = final_model.fit()
# Make predictions
test_predictions = final_model_fit.forecast(steps=test_size)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Evaluation metrics
y_true = data[train_size+val_size:]
y_pred = test_predictions
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
mape = mean_absolute_percentage_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
# Calculate TSS, RSS, ESS
y_mean = np.mean(y_true)
tss = np.sum((y_true - y_mean) ** 2)
rss = np.sum((y_true - y_pred) ** 2)
ess = tss - rss # Explained Sum of Squares
# Directional Accuracy (DA)
true_diff = np.diff(y_true)
pred_diff = np.diff(y_pred)
da = np.mean(np.sign(true_diff) == np.sign(pred_diff)) * 100
print("\nFinal Evaluation Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}%")
print(f"TSS: {tss:.4f}")
print(f"RSS: {rss:.4f}")
print(f"ESS: {ess:.4f}")
print(f"R²: {r2:.4f}")
print(f"Directional Accuracy (DA): {da:.2f}%")
# Create a DataFrame for comparison
results_df = pd.DataFrame({
"Date": df.index[train_size+val_size:],
"Actual": y_true,
"Predicted": y_pred
})
print("\nActual vs Predicted Values:")
print(results_df.head(20)) # Show first 20 rows (change as needed)
# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], y_true, label='Actual')
plt.plot(df.index[train_size+val_size:], y_pred, label='Predicted', linestyle='--')
plt.title("Wavelet-SARIMA: Actual vs Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/warima_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Model diagnostics
plt.figure(figsize=(12, 6))
print("\nSARIMA Model Summary:")
print(final_model_fit.summary())
final_model_fit.plot_diagnostics(figsize=(12, 8))
plt.suptitle("Model Diagnostics", y=1.02)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/warima_result3.png", dpi=300, bbox_inches='tight')
plt.show()
Training ARIMA on denoised data...
Best params: {'order': (2, 0, 2)} with MSE: 19541.3318
Final Evaluation Metrics:
MSE: 600053.5119
RMSE: 774.6312
MAE: 653.1152
MAPE: 0.3622%
TSS: 21666634.8624
RSS: 65405832.7919
ESS: -43739197.9295
R²: -2.0187
Directional Accuracy (DA): 24.07%
Actual vs Predicted Values:
Date Actual Predicted
0 2022-10-02 975.0 926.366548
1 2022-10-09 975.0 923.112194
2 2022-10-16 850.0 920.442949
3 2022-10-23 900.0 918.283332
4 2022-10-30 900.0 916.567174
5 2022-11-06 900.0 915.236476
6 2022-11-13 900.0 914.240411
7 2022-11-20 900.0 913.534442
8 2022-11-27 900.0 913.079556
9 2022-12-04 850.0 912.841581
10 2022-12-11 850.0 912.790599
11 2022-12-18 875.0 912.900420
12 2022-12-25 875.0 913.148127
13 2023-01-01 875.0 913.513671
14 2023-01-08 820.0 913.979524
15 2023-01-15 925.0 914.530365
16 2023-01-22 825.0 915.152813
17 2023-01-29 950.0 915.835183
18 2023-02-05 950.0 916.567282
19 2023-02-12 950.0 917.340223
SARIMA Model Summary:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 613
Model: ARIMA(2, 0, 2) Log Likelihood -2067.804
Date: Thu, 06 Nov 2025 AIC 4147.608
Time: 10:55:45 BIC 4174.119
Sample: 0 HQIC 4157.918
- 613
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 972.3970 320.186 3.037 0.002 344.844 1599.950
ar.L1 1.8567 0.013 143.524 0.000 1.831 1.882
ar.L2 -0.8592 0.013 -65.286 0.000 -0.885 -0.833
ma.L1 0.9831 0.520 1.892 0.059 -0.035 2.002
ma.L2 -0.0168 0.037 -0.455 0.649 -0.089 0.055
sigma2 48.4313 25.221 1.920 0.055 -1.001 97.863
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 6959.44
Prob(Q): 0.98 Prob(JB): 0.00
Heteroskedasticity (H): 9.98 Skew: 1.32
Prob(H) (two-sided): 0.00 Kurtosis: 19.29
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<Figure size 1200x600 with 0 Axes>
In [ ]:
--- Search for SARIMA Parameters (Grid Search + Time Series CV) ---¶
Fit SARIMA¶
In [35]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from itertools import product
from sklearn.preprocessing import MinMaxScaler
# Suppress warnings
warnings.filterwarnings("ignore")
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
# Perform DWT decomposition with db4 at level 3
wavelet = 'db4'
level = 3
coeffs = pywt.wavedec(data, wavelet, level=level)
# Denoise by keeping only the approximation coefficients (remove high-frequency details)
denoised_coeffs = [coeffs[0]] + [np.zeros_like(c) for c in coeffs[1:]]
denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)] # Ensure same length
# Alternatively, you can keep some of the detail coefficients if they contain meaningful information
# For example, keep the first level detail:
# denoised_coeffs = [coeffs[0], coeffs[1]] + [np.zeros_like(c) for c in coeffs[2:]]
# denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]
# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original')
plt.plot(df.index, denoised_data, label='Denoised', linestyle='--')
plt.title("Original vs Denoised Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wsarima_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Data splitting
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
# SARIMA Hyperparameter tuning (simplified)
seasonal_period = 26 # Adjust based on your data's seasonality
# Reduced parameter grid for faster training
param_grid = {
'order': [ (0,1,0)], # Common ARIMA orders
'seasonal_order': [ (1,1,1,seasonal_period)] # Common seasonal orders
}
best_score = float('inf')
best_params = None
best_model = None
print("Training SARIMA on denoised data...")
for order, seasonal_order in product(param_grid['order'], param_grid['seasonal_order']):
try:
model = SARIMAX(denoised_data[:train_size],
order=order,
seasonal_order=seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False)
model_fit = model.fit(disp=False)
val_pred = model_fit.forecast(steps=val_size)
score = mean_squared_error(denoised_data[train_size:train_size+val_size], val_pred)
if score < best_score:
best_score = score
best_params = {'order': order, 'seasonal_order': seasonal_order}
best_model = model_fit
except Exception as e:
continue
print(f"Best params: {best_params} with MSE: {best_score:.4f}")
# Final model training on train+validation
final_model = SARIMAX(denoised_data[:train_size+val_size],
order=best_params['order'],
seasonal_order=best_params['seasonal_order'],
enforce_stationarity=False,
enforce_invertibility=False)
final_model_fit = final_model.fit(disp=False)
# Make predictions
test_predictions = final_model_fit.forecast(steps=test_size)
# ... your existing code for making predictions ...
# Extract the actual test values and the predictions
y_actual = data[train_size+val_size:]
y_pred = test_predictions
# Ensure they are the same length (a good practice)
min_length = min(len(y_actual), len(y_pred))
y_actual = y_actual[:min_length]
y_pred = y_pred[:min_length]
# Standard Regression Metrics
mse = mean_squared_error(y_actual, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_actual, y_pred)
mape = mean_absolute_percentage_error(y_actual, y_pred)
r2 = r2_score(y_actual, y_pred)
# Sum of Squares Calculations
mean_actual = np.mean(y_actual)
tss = np.sum((y_actual - mean_actual) ** 2) # Total Sum of Squares
rss = np.sum((y_actual - y_pred) ** 2) # Residual Sum of Squares
ess = np.sum((y_pred - mean_actual) ** 2) # Explained Sum of Squares
# Note: TSS should equal RSS + ESS (within floating-point precision)
# Directional Accuracy (DA) Calculation
# First, calculate the actual change and the predicted change
actual_changes = np.diff(y_actual) # e.g., [y1-y0, y2-y1, ...]
predicted_changes = np.diff(y_pred) # e.g., [y_hat1-y_hat0, y_hat2-y_hat1, ...]
# Check if the direction (sign) of the change is the same
correct_direction = np.sign(actual_changes) == np.sign(predicted_changes)
# Calculate the percentage of correct directions
# We use the first value of the differences, so the number of predictions is len(actual_changes)
da = (np.sum(correct_direction) / len(actual_changes)) * 100
print("\nFinal Evaluation Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}%")
print(f"TSS: {tss:.4f}")
print(f"RSS: {rss:.4f}")
print(f"ESS: {ess:.4f}")
print(f"R²: {r2:.4f}")
print(f"Directional Accuracy (DA): {da:.2f}%")
# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], data[train_size+val_size:], label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', linestyle='--')
plt.title("Wavelet-SARIMA: Actual vs Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wsarima_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Model diagnostics
print("\nSARIMA Model Summary:")
print(final_model_fit.summary())
final_model_fit.plot_diagnostics(figsize=(12, 8))
plt.suptitle("Model Diagnostics", y=1.02)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wsarima_result3.png", dpi=300, bbox_inches='tight')
plt.show()
Training SARIMA on denoised data...
Best params: {'order': (0, 1, 0), 'seasonal_order': (1, 1, 1, 26)} with MSE: 584371.4103
Final Evaluation Metrics:
MSE: 644062.8720
RMSE: 802.5353
MAE: 677.0230
MAPE: 0.3750%
TSS: 21666634.8624
RSS: 70202853.0449
ESS: 48901362.6727
R²: -2.2401
Directional Accuracy (DA): 26.85%
SARIMA Model Summary:
SARIMAX Results
============================================================================================
Dep. Variable: y No. Observations: 613
Model: SARIMAX(0, 1, 0)x(1, 1, [1], 26) Log Likelihood -2655.903
Date: Thu, 06 Nov 2025 AIC 5317.806
Time: 10:56:56 BIC 5330.784
Sample: 0 HQIC 5322.874
- 613
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.S.L26 0.0384 0.026 1.453 0.146 -0.013 0.090
ma.S.L26 -1.0000 72.287 -0.014 0.989 -142.679 140.679
sigma2 689.6068 4.99e+04 0.014 0.989 -9.71e+04 9.84e+04
===================================================================================
Ljung-Box (L1) (Q): 481.69 Jarque-Bera (JB): 1714.59
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 15.92 Skew: 0.71
Prob(H) (two-sided): 0.00 Kurtosis: 11.46
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Wavelet +LSTM¶
Import Libraries for Wavelet +LSTM¶
In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf
# Suppress warnings
warnings.filterwarnings("ignore")
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
print(f"Original data length: {len(data)}")
# --- Step 1: Wavelet Decomposition ---
wavelet = 'db4'
level = 3
coeffs = pywt.wavedec(data, wavelet, level=level)
# Keep approximation coefficients and first level details for meaningful information
denoised_coeffs = [coeffs[0], coeffs[1]] + [np.zeros_like(c) for c in coeffs[2:]]
denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]
# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, denoised_data, label='Wavelet Denoised', linewidth=2)
plt.title("Original vs Wavelet-Denoised Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 2: Data Preparation for LSTM ---
# Use denoised data for training
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(denoised_data.reshape(-1, 1))
# Create sequences for LSTM
def create_sequences(data, lookback=52):
X, y = [], []
for i in range(lookback, len(data)):
X.append(data[i-lookback:i])
y.append(data[i])
return np.array(X), np.array(y)
lookback = 52 # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)
# Reshape for LSTM [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
# --- Step 3: Hyperparameter Tuning ---
def build_model(hp):
model = Sequential()
# Number of LSTM layers
for i in range(hp.Int('num_layers', 1, 3)):
model.add(LSTM(
units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
))
model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
# Dense layers with ReLU activation
for i in range(hp.Int('dense_layers', 0, 2)):
model.add(Dense(
units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
activation='relu' # Using ReLU activation
))
model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
model.add(Dense(1, activation='linear'))
model.compile(
optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
loss='mse',
metrics=['mae']
)
return model
print("\nStarting hyperparameter tuning...")
tuner = RandomSearch(
build_model,
objective='val_loss',
max_trials=15,
executions_per_trial=2,
directory='wavelet_lstm_tuning',
project_name='cardamom_wavelet_lstm'
)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
tuner.search(
X_train, y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of LSTM layers: {best_hp.get('num_layers')}")
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
print(f"LSTM layer {i+1} units: {best_hp.get(f'units_{i}')}")
print(f"LSTM layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")
# --- Step 4: Build and Train Final Model ---
final_model = tuner.hypermodel.build(best_hp)
print("\nTraining final model...")
history = final_model.fit(
X_train, y_train,
epochs=200,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# --- Step 5: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()
# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
# Get actual values (original scale)
y_actual_original = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
y_actual_denoised = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
# --- Step 6: Evaluation ---
def evaluate_forecast(actual, forecast):
"""Comprehensive forecast evaluation"""
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
mape = mean_absolute_percentage_error(actual, forecast)
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
return {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'MAPE': mape,
'R²': r2,
'Directional Accuracy': da
}
# Evaluate on denoised data
metrics_denoised = evaluate_forecast(y_actual_denoised, y_pred)
# Evaluate on original data
metrics_original = evaluate_forecast(y_actual_original, y_pred)
print("\n" + "="*60)
print("WAVELET-LSTM MODEL TRAINING SUMMARY")
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print(f"Wavelet used: {wavelet} level {level}")
print("\nModel Architecture:")
final_model.summary()
print("\n" + "="*60)
print("EVALUATION ON DENOISED DATA")
print("="*60)
for metric, value in metrics_denoised.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
print("\n" + "="*60)
print("EVALUATION ON ORIGINAL DATA")
print("="*60)
for metric, value in metrics_original.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
# --- Step 7: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Wavelet-LSTM Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original vs Denoised vs Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', alpha=0.7, color='blue')
plt.plot(df.index, denoised_data, label='Wavelet Denoised', color='green', linewidth=2)
plt.plot(test_dates, y_pred, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original vs Wavelet-Denoised vs Forecast')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates,
y_pred - metrics_original['RMSE'],
y_pred + metrics_original['RMSE'],
alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 8: Residual Analysis ---
residuals = y_actual_original - y_pred
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('Residuals Over Time')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result3.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('Residual Distribution')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result4.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result5.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual_original, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result6.png", dpi=300, bbox_inches='tight')
plt.show()
print("\nResidual Analysis:")
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")
# --- Step 9: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
"""Forecast future values"""
forecasts = []
current_sequence = last_sequence.copy()
for _ in range(steps):
prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
forecasts.append(prediction)
# Update sequence
new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
current_sequence = new_sequence
# Inverse transform
forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
return future_dates, forecasts
# Forecast next 12 weeks
try:
last_sequence = scaled_data[-lookback:]
future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)
print("\n" + "="*50)
print("FUTURE FORECAST (NEXT 12 WEEKS)")
print("="*50)
for date, price in zip(future_dates, future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
# Plot future forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
plt.plot(future_dates, future_prices, label='Future Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title('Future Price Forecast (Next 12 Weeks)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result7.png", dpi=300, bbox_inches='tight')
plt.show()
except Exception as e:
print(f"Future forecasting failed: {e}")
C:\Users\marti\AppData\Local\Temp\ipykernel_24668\1254446090.py:12: DeprecationWarning: `import kerastuner` is deprecated, please use `import keras_tuner`. from kerastuner.tuners import RandomSearch
Original data length: 722
Training sequences: (468, 52, 1) Validation sequences: (100, 52, 1) Test sequences: (102, 52, 1) Starting hyperparameter tuning... Reloading Tuner from wavelet_lstm_tuning\cardamom_wavelet_lstm\tuner0.json Best Hyperparameters: Number of LSTM layers: 1 Learning rate: 0.003792091345898107 LSTM layer 1 units: 192 LSTM layer 1 dropout: 0.4 Training final model... Epoch 1/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 5s 65ms/step - loss: 0.0645 - mae: 0.1601 - val_loss: 0.0049 - val_mae: 0.0622 Epoch 2/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0083 - mae: 0.0580 - val_loss: 9.0763e-04 - val_mae: 0.0205 Epoch 3/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0077 - mae: 0.0535 - val_loss: 8.9447e-04 - val_mae: 0.0229 Epoch 4/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0061 - mae: 0.0517 - val_loss: 8.8476e-04 - val_mae: 0.0218 Epoch 5/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0056 - mae: 0.0414 - val_loss: 8.5948e-04 - val_mae: 0.0221 Epoch 6/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0062 - mae: 0.0453 - val_loss: 7.6791e-04 - val_mae: 0.0205 Epoch 7/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0052 - mae: 0.0379 - val_loss: 0.0013 - val_mae: 0.0291 Epoch 8/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0035 - mae: 0.0355 - val_loss: 0.0022 - val_mae: 0.0406 Epoch 9/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0071 - mae: 0.0555 - val_loss: 0.0014 - val_mae: 0.0315 Epoch 10/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0055 - mae: 0.0477 - val_loss: 7.8204e-04 - val_mae: 0.0203 Epoch 11/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0048 - mae: 0.0371 - val_loss: 4.8616e-04 - val_mae: 0.0146 Epoch 12/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0037 - mae: 0.0327 - val_loss: 5.4265e-04 - val_mae: 0.0155 Epoch 13/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0049 - mae: 0.0387 - val_loss: 7.9955e-04 - val_mae: 0.0234 Epoch 14/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0034 - mae: 0.0333 - val_loss: 5.1381e-04 - val_mae: 0.0161 Epoch 15/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0032 - mae: 0.0308 - val_loss: 7.3843e-04 - val_mae: 0.0218 Epoch 16/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0029 - mae: 0.0315 - val_loss: 6.6110e-04 - val_mae: 0.0211 Epoch 17/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0033 - mae: 0.0363 - val_loss: 8.3089e-04 - val_mae: 0.0242 Epoch 18/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0038 - mae: 0.0356 - val_loss: 4.2480e-04 - val_mae: 0.0146 Epoch 19/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0031 - mae: 0.0322 - val_loss: 2.9074e-04 - val_mae: 0.0117 Epoch 20/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0035 - mae: 0.0317 - val_loss: 6.1603e-04 - val_mae: 0.0211 Epoch 21/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0038 - mae: 0.0354 - val_loss: 0.0014 - val_mae: 0.0321 Epoch 22/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0029 - mae: 0.0318 - val_loss: 2.7690e-04 - val_mae: 0.0128 Epoch 23/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0043 - mae: 0.0372 - val_loss: 6.1362e-04 - val_mae: 0.0195 Epoch 24/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0028 - mae: 0.0317 - val_loss: 6.9217e-04 - val_mae: 0.0207 Epoch 25/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0297 - val_loss: 3.3069e-04 - val_mae: 0.0148 Epoch 26/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0022 - mae: 0.0272 - val_loss: 3.3206e-04 - val_mae: 0.0142 Epoch 27/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0288 - val_loss: 2.3746e-04 - val_mae: 0.0111 Epoch 28/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0026 - mae: 0.0283 - val_loss: 2.8762e-04 - val_mae: 0.0123 Epoch 29/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0274 - val_loss: 2.4683e-04 - val_mae: 0.0111 Epoch 30/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0022 - mae: 0.0272 - val_loss: 2.7630e-04 - val_mae: 0.0121 Epoch 31/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0033 - mae: 0.0321 - val_loss: 4.4869e-04 - val_mae: 0.0180 Epoch 32/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0027 - mae: 0.0301 - val_loss: 2.7686e-04 - val_mae: 0.0130 Epoch 33/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0026 - mae: 0.0293 - val_loss: 2.3408e-04 - val_mae: 0.0109 Epoch 34/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0028 - mae: 0.0310 - val_loss: 7.1281e-04 - val_mae: 0.0242 Epoch 35/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0022 - mae: 0.0280 - val_loss: 3.3877e-04 - val_mae: 0.0145 Epoch 36/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0022 - mae: 0.0293 - val_loss: 2.0179e-04 - val_mae: 0.0105 Epoch 37/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0015 - mae: 0.0252 - val_loss: 1.8896e-04 - val_mae: 0.0105 Epoch 38/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0015 - mae: 0.0219 - val_loss: 8.8271e-04 - val_mae: 0.0263 Epoch 39/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0032 - mae: 0.0310 - val_loss: 2.9015e-04 - val_mae: 0.0126 Epoch 40/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0260 - val_loss: 3.6359e-04 - val_mae: 0.0155 Epoch 41/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0022 - mae: 0.0285 - val_loss: 7.0972e-04 - val_mae: 0.0243 Epoch 42/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0031 - mae: 0.0316 - val_loss: 4.9701e-04 - val_mae: 0.0191 Epoch 43/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0277 - val_loss: 4.1972e-04 - val_mae: 0.0168 Epoch 44/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0026 - mae: 0.0302 - val_loss: 0.0010 - val_mae: 0.0280 Epoch 45/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0322 - val_loss: 3.0194e-04 - val_mae: 0.0124 Epoch 46/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0018 - mae: 0.0275 - val_loss: 4.0251e-04 - val_mae: 0.0175 Epoch 47/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0253 - val_loss: 3.9413e-04 - val_mae: 0.0153 Epoch 48/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0277 - val_loss: 2.8753e-04 - val_mae: 0.0138 Epoch 49/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0285 - val_loss: 1.8861e-04 - val_mae: 0.0105 Epoch 50/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0017 - mae: 0.0247 - val_loss: 6.1732e-04 - val_mae: 0.0224 Epoch 51/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0030 - mae: 0.0318 - val_loss: 3.3542e-04 - val_mae: 0.0154 Epoch 52/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0020 - mae: 0.0278 - val_loss: 2.7674e-04 - val_mae: 0.0128 Epoch 53/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0023 - mae: 0.0272 - val_loss: 1.7684e-04 - val_mae: 0.0101 Epoch 54/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0020 - mae: 0.0263 - val_loss: 3.4157e-04 - val_mae: 0.0137 Epoch 55/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0275 - val_loss: 0.0014 - val_mae: 0.0349 Epoch 56/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0027 - mae: 0.0347 - val_loss: 2.0435e-04 - val_mae: 0.0106 Epoch 57/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0026 - mae: 0.0300 - val_loss: 3.0771e-04 - val_mae: 0.0149 Epoch 58/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0030 - mae: 0.0315 - val_loss: 2.6914e-04 - val_mae: 0.0139 Epoch 59/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0267 - val_loss: 3.0085e-04 - val_mae: 0.0134 Epoch 60/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0020 - mae: 0.0275 - val_loss: 1.5226e-04 - val_mae: 0.0098 Epoch 61/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0025 - mae: 0.0310 - val_loss: 1.4427e-04 - val_mae: 0.0085 Epoch 62/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0258 - val_loss: 7.7064e-04 - val_mae: 0.0235 Epoch 63/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0293 - val_loss: 4.6110e-04 - val_mae: 0.0176 Epoch 64/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0015 - mae: 0.0262 - val_loss: 3.5620e-04 - val_mae: 0.0165 Epoch 65/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0254 - val_loss: 0.0010 - val_mae: 0.0295 Epoch 66/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0033 - mae: 0.0333 - val_loss: 0.0015 - val_mae: 0.0356 Epoch 67/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0023 - mae: 0.0338 - val_loss: 4.7742e-04 - val_mae: 0.0187 Epoch 68/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0261 - val_loss: 2.6976e-04 - val_mae: 0.0136 Epoch 69/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0014 - mae: 0.0247 - val_loss: 1.2351e-04 - val_mae: 0.0081 Epoch 70/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0022 - mae: 0.0291 - val_loss: 2.0897e-04 - val_mae: 0.0109 Epoch 71/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0265 - val_loss: 1.5730e-04 - val_mae: 0.0095 Epoch 72/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0018 - mae: 0.0254 - val_loss: 1.5710e-04 - val_mae: 0.0091 Epoch 73/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0016 - mae: 0.0251 - val_loss: 1.0221e-04 - val_mae: 0.0074 Epoch 74/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0263 - val_loss: 1.4163e-04 - val_mae: 0.0093 Epoch 75/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0019 - mae: 0.0266 - val_loss: 1.1923e-04 - val_mae: 0.0080 Epoch 76/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0020 - mae: 0.0273 - val_loss: 1.1192e-04 - val_mae: 0.0084 Epoch 77/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0013 - mae: 0.0252 - val_loss: 0.0013 - val_mae: 0.0323 Epoch 78/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0017 - mae: 0.0274 - val_loss: 7.0122e-04 - val_mae: 0.0217 Epoch 79/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0299 - val_loss: 1.3008e-04 - val_mae: 0.0088 Epoch 80/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0019 - mae: 0.0287 - val_loss: 1.5515e-04 - val_mae: 0.0096 Epoch 81/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0016 - mae: 0.0250 - val_loss: 2.4368e-04 - val_mae: 0.0132 Epoch 82/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0018 - mae: 0.0267 - val_loss: 5.4898e-04 - val_mae: 0.0207 Epoch 83/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0021 - mae: 0.0289 - val_loss: 3.7882e-04 - val_mae: 0.0155 Epoch 84/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0017 - mae: 0.0274 - val_loss: 1.7719e-04 - val_mae: 0.0108 Epoch 85/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0019 - mae: 0.0279 - val_loss: 1.1283e-04 - val_mae: 0.0079 Epoch 86/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0018 - mae: 0.0277 - val_loss: 1.0534e-04 - val_mae: 0.0081 Epoch 87/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 38ms/step - loss: 0.0013 - mae: 0.0241 - val_loss: 2.6526e-04 - val_mae: 0.0125 Epoch 88/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 39ms/step - loss: 0.0013 - mae: 0.0239 - val_loss: 1.5702e-04 - val_mae: 0.0099 4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 93ms/step ============================================================ WAVELET-LSTM MODEL TRAINING SUMMARY ============================================================ Final epochs trained: 88 Best validation loss: 0.0001 Best validation MAE: 0.0074 Lookback period: 52 weeks Wavelet used: db4 level 3 Model Architecture:
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ lstm (LSTM) │ (None, 192) │ 148,992 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout (Dropout) │ (None, 192) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense (Dense) │ (None, 16) │ 3,088 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_1 (Dropout) │ (None, 16) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 1) │ 17 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 456,293 (1.74 MB)
Trainable params: 152,097 (594.13 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 304,196 (1.16 MB)
============================================================ EVALUATION ON DENOISED DATA ============================================================ MSE: 8020.6906 RMSE: 89.5583 MAE: 69.3841 MAPE: 0.04% R²: 0.9505 Directional Accuracy: 83.17% ============================================================ EVALUATION ON ORIGINAL DATA ============================================================ MSE: 19622.4330 RMSE: 140.0801 MAE: 102.9759 MAPE: 0.07% R²: 0.8907 Directional Accuracy: 37.62%
Residual Analysis: Residual mean: 55.2330 Residual std: 128.7313 Residual min: -322.7833 Residual max: 439.5608 ================================================== FUTURE FORECAST (NEXT 12 WEEKS) ================================================== 2024-11-03: 2225.41 2024-11-10: 2106.23 2024-11-17: 1894.65 2024-11-24: 1694.13 2024-12-01: 1568.63 2024-12-08: 1519.14 2024-12-15: 1513.12 2024-12-22: 1516.49 2024-12-29: 1506.74 2025-01-05: 1477.39 2025-01-12: 1433.95 2025-01-19: 1386.47
In [28]:
# --- Step 11: Detailed Model Configuration Report & JSON Export ---
import json
import datetime
import platform
from tensorflow.keras import backend as K
print("\n" + "="*60)
print("WAVELET + LSTM MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)
report = {}
# Optimizer details
try:
opt = final_model.optimizer
opt_name = opt.__class__.__name__
try:
lr_val = float(K.get_value(opt.learning_rate))
except Exception:
opt_cfg = opt.get_config()
lr_val = opt_cfg.get('learning_rate', None)
try:
lr_val = float(lr_val)
except Exception:
pass
report['optimizer'] = opt_name
report['learning_rate'] = lr_val
print(f"Optimizer: {opt_name}")
print(f"Learning Rate: {lr_val}")
except Exception as e:
report['optimizer'] = str(e)
print(f"Could not fetch optimizer details: {e}")
# Hyperparameters (from tuner if available)
try:
report['best_hyperparameters'] = best_hp.values
print("\nBest Hyperparameters (from tuner):")
for k, v in best_hp.values.items():
print(f" {k}: {v}")
except Exception as e:
report['best_hyperparameters'] = None
print(f"No best hyperparameters found: {e}")
# Model layers
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
layer_info = {
'index': i+1,
'class_name': layer.__class__.__name__,
'name': layer.name
}
if hasattr(layer, 'units'):
layer_info['units'] = getattr(layer, 'units', None)
print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
else:
print(f" Layer {i+1}: {layer.__class__.__name__}")
if hasattr(layer, 'activation'):
try:
layer_info['activation'] = layer.activation.__name__
except Exception:
layer_info['activation'] = str(layer.activation)
if hasattr(layer, 'rate'):
layer_info['dropout_rate'] = getattr(layer, 'rate', None)
if hasattr(layer, 'return_sequences'):
layer_info['return_sequences'] = getattr(layer, 'return_sequences', None)
try:
layer_info['input_shape'] = layer.input_shape
layer_info['output_shape'] = layer.output_shape
except:
layer_info['input_shape'] = None
layer_info['output_shape'] = None
layers_report.append(layer_info)
report['layers'] = layers_report
# Training summary
training_summary = {
'lookback': lookback,
'epochs_trained': len(history.history['loss']),
'final_training_loss': float(history.history['loss'][-1]),
'final_validation_loss': float(min(history.history['val_loss'])),
'final_training_mae': float(history.history['mae'][-1]) if 'mae' in history.history else None,
'final_validation_mae': float(min(history.history['val_mae'])) if 'val_mae' in history.history else None,
'wavelet': wavelet,
'wavelet_level': level
}
report['training_summary'] = training_summary
print("\nTraining Summary:")
for k, v in training_summary.items():
print(f" {k}: {v}")
# Evaluation
try:
report['evaluation_metrics'] = {
'denoised': metrics_denoised,
'original': metrics_original
}
print("\nEvaluation Metrics attached.")
except Exception as e:
report['evaluation_metrics'] = None
print(f"Could not attach evaluation metrics: {e}")
# Residual stats
try:
residuals_stats = {
'residual_mean': float(np.mean(residuals)),
'residual_std': float(np.std(residuals)),
'residual_min': float(np.min(residuals)),
'residual_max': float(np.max(residuals))
}
report['residuals'] = residuals_stats
print("\nResiduals Summary attached.")
except Exception as e:
print(f"Residual stats failed: {e}")
# Future forecast (if available)
try:
forecast_report = {
'dates': [str(d) for d in future_dates],
'forecasted_prices': [float(p) for p in future_prices]
}
report['future_forecast'] = forecast_report
print("\nFuture forecast added to report.")
except Exception as e:
report['future_forecast'] = None
print(f"Future forecast not added: {e}")
# Metadata
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__
# Save JSON
report_filename = "wavelet_lstm_report.json"
with open(report_filename, "w", encoding="utf-8") as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\nSaved detailed report to: {report_filename}")
print("="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================ WAVELET + LSTM MODEL CONFIGURATION & TRAINING REPORT ============================================================ Optimizer: Adam Learning Rate: 0.0037920912727713585 Best Hyperparameters (from tuner): num_layers: 1 units_0: 192 dropout_0: 0.4 dense_layers: 1 learning_rate: 0.003792091345898107 dense_units_0: 16 dense_dropout_0: 0.1 Model Layers: Layer 1: LSTM - units: 192 Layer 2: Dropout Layer 3: Dense - units: 16 Layer 4: Dropout Layer 5: Dense - units: 1 Training Summary: lookback: 52 epochs_trained: 88 final_training_loss: 0.00203199265524745 final_validation_loss: 0.0001546310231788084 final_training_mae: 0.028295811265707016 final_validation_mae: 0.008852318860590458 wavelet: db4 wavelet_level: 3 Evaluation Metrics attached. Residuals Summary attached. Future forecast added to report. Saved detailed report to: wavelet_lstm_report.json ============================================================ REPORT COMPLETE ============================================================
In [28]:
# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, denoised_data, label='Wavelet Denoised', linewidth=2)
plt.title("Original vs Wavelet-Denoised Time Series")
plt.legend()
plt.grid()
plt.show()
In [3]:
# Plot 3: Separate View - Actual vs Predicted
# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted', color='red', linestyle='--', linewidth=2)
plt.title('Actual vs Predicted (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result17.png", dpi=300, bbox_inches='tight')
plt.show()
Wavelet + GRU¶
Import Libraries¶
In [5]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pywt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout # Changed LSTM to GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf
# Suppress warnings
warnings.filterwarnings("ignore")
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
print(f"Original data length: {len(data)}")
# --- Step 1: Wavelet Decomposition ---
wavelet = 'db4'
level = 3
coeffs = pywt.wavedec(data, wavelet, level=level)
# Keep approximation coefficients and first level details for meaningful information
denoised_coeffs = [coeffs[0], coeffs[1]] + [np.zeros_like(c) for c in coeffs[2:]]
denoised_data = pywt.waverec(denoised_coeffs, wavelet)[:len(data)]
# Plot original vs denoised data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, denoised_data, label='Wavelet Denoised', linewidth=2)
plt.title("Original vs Wavelet-Denoised Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 2: Data Preparation for GRU ---
# Use denoised data for training
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(denoised_data.reshape(-1, 1))
# Create sequences for GRU
def create_sequences(data, lookback=52):
X, y = [], []
for i in range(lookback, len(data)):
X.append(data[i-lookback:i])
y.append(data[i])
return np.array(X), np.array(y)
lookback = 52 # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)
# Reshape for GRU [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
# --- Step 3: Hyperparameter Tuning for GRU ---
def build_model(hp):
model = Sequential()
# Number of GRU layers (changed from LSTM to GRU)
for i in range(hp.Int('num_layers', 1, 3)):
model.add(GRU( # Changed LSTM to GRU
units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
))
model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
# Dense layers with ReLU activation
for i in range(hp.Int('dense_layers', 0, 2)):
model.add(Dense(
units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
activation='relu'
))
model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
model.add(Dense(1, activation='linear'))
model.compile(
optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
loss='mse',
metrics=['mae']
)
return model
print("\nStarting GRU hyperparameter tuning...")
tuner = RandomSearch(
build_model,
objective='val_loss',
max_trials=15,
executions_per_trial=2,
directory='wavelet_gru_tuning', # Changed directory name
project_name='cardamom_wavelet_gru' # Changed project name
)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
tuner.search(
X_train, y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of GRU layers: {best_hp.get('num_layers')}") # Changed from LSTM to GRU
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
print(f"GRU layer {i+1} units: {best_hp.get(f'units_{i}')}") # Changed from LSTM to GRU
print(f"GRU layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}") # Changed from LSTM to GRU
# --- Step 4: Build and Train Final GRU Model ---
final_model = tuner.hypermodel.build(best_hp)
print("\nTraining final GRU model...") # Changed to GRU
history = final_model.fit(
X_train, y_train,
epochs=200,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# --- Step 5: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()
# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
# Get actual values (original scale)
y_actual_original = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
y_actual_denoised = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
# --- Step 6: Evaluation ---
def evaluate_forecast(actual, forecast):
"""Comprehensive forecast evaluation"""
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
mape = mean_absolute_percentage_error(actual, forecast)
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
return {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'MAPE': mape,
'R²': r2,
'Directional Accuracy': da
}
# Evaluate on denoised data
metrics_denoised = evaluate_forecast(y_actual_denoised, y_pred)
# Evaluate on original data
metrics_original = evaluate_forecast(y_actual_original, y_pred)
print("\n" + "="*60)
print("WAVELET-GRU MODEL TRAINING SUMMARY") # Changed to GRU
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print(f"Wavelet used: {wavelet} level {level}")
print("\nGRU Model Architecture:") # Changed to GRU
final_model.summary()
print("\n" + "="*60)
print("EVALUATION ON DENOISED DATA")
print("="*60)
for metric, value in metrics_denoised.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
print("\n" + "="*60)
print("EVALUATION ON ORIGINAL DATA")
print("="*60)
for metric, value in metrics_original.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
# --- Step 7: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Wavelet-GRU Training History') # Changed to GRU
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original vs Denoised vs Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', alpha=0.7, color='blue')
plt.plot(df.index, denoised_data, label='Wavelet Denoised', color='green', linewidth=2)
plt.plot(test_dates, y_pred, label='Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original vs Wavelet-Denoised vs Forecast (GRU)') # Added GRU
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2) # Added GRU
plt.fill_between(test_dates,
y_pred - metrics_original['RMSE'],
y_pred + metrics_original['RMSE'],
alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - GRU Model (Test Period)') # Added GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 8: Residual Analysis ---
residuals = y_actual_original - y_pred
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('GRU Model Residuals Over Time') # Added GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result5.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('GRU Residual Distribution') # Added GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result6.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('GRU Residuals vs Predicted') # Added GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result7.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual_original, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('GRU Residuals vs Actual') # Added GRU
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result8.png", dpi=300, bbox_inches='tight')
plt.show()
print("\nGRU Residual Analysis:") # Added GRU
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")
# --- Step 9: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
"""Forecast future values"""
forecasts = []
current_sequence = last_sequence.copy()
for _ in range(steps):
prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
forecasts.append(prediction)
# Update sequence
new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
current_sequence = new_sequence
# Inverse transform
forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
return future_dates, forecasts
# Forecast next 12 weeks
try:
last_sequence = scaled_data[-lookback:]
future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)
print("\n" + "="*50)
print("FUTURE FORECAST - GRU MODEL (NEXT 12 WEEKS)") # Added GRU
print("="*50)
for date, price in zip(future_dates, future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
# Plot future forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
plt.plot(future_dates, future_prices, label='GRU Future Forecast', color='red', linestyle='--', linewidth=2) # Added GRU
plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title('GRU Future Price Forecast (Next 12 Weeks)') # Added GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result9.png", dpi=300, bbox_inches='tight')
plt.show()
except Exception as e:
print(f"Future forecasting failed: {e}")
# --- GRU Benefits Summary ---
print("\n" + "="*60)
print("GRU MODEL ADVANTAGES OVER LSTM")
print("="*60)
print("1. Computational Efficiency: Fewer parameters (2 gates vs LSTM's 3 gates)")
print("2. Faster Training: Less complex architecture leads to faster training times")
print("3. Better Performance: Often performs better on smaller datasets")
print("4. Reduced Overfitting: Simpler architecture can be less prone to overfitting")
print("5. Memory Efficiency: Uses less memory during training and inference")
print("6. Faster Convergence: Typically converges faster than LSTM")
print("7. Better Gradient Flow: Simpler architecture improves gradient propagation")
Original data length: 722
Training sequences: (468, 52, 1) Validation sequences: (100, 52, 1) Test sequences: (102, 52, 1) Starting GRU hyperparameter tuning... Reloading Tuner from wavelet_gru_tuning\cardamom_wavelet_gru\tuner0.json Best Hyperparameters: Number of GRU layers: 1 Learning rate: 0.002051386718289359 GRU layer 1 units: 224 GRU layer 1 dropout: 0.4 Training final GRU model... Epoch 1/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 5s 81ms/step - loss: 0.0406 - mae: 0.1411 - val_loss: 6.6561e-04 - val_mae: 0.0173 Epoch 2/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0042 - mae: 0.0385 - val_loss: 0.0012 - val_mae: 0.0282 Epoch 3/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0039 - mae: 0.0371 - val_loss: 7.1862e-04 - val_mae: 0.0202 Epoch 4/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0032 - mae: 0.0283 - val_loss: 4.0779e-04 - val_mae: 0.0146 Epoch 5/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0032 - mae: 0.0290 - val_loss: 3.9818e-04 - val_mae: 0.0130 Epoch 6/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0028 - mae: 0.0266 - val_loss: 3.1017e-04 - val_mae: 0.0116 Epoch 7/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0020 - mae: 0.0252 - val_loss: 4.3022e-04 - val_mae: 0.0165 Epoch 8/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0017 - mae: 0.0231 - val_loss: 2.4149e-04 - val_mae: 0.0107 Epoch 9/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0016 - mae: 0.0229 - val_loss: 2.0879e-04 - val_mae: 0.0107 Epoch 10/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 0.0014 - mae: 0.0220 - val_loss: 4.7442e-04 - val_mae: 0.0182 Epoch 11/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0015 - mae: 0.0205 - val_loss: 2.3027e-04 - val_mae: 0.0106 Epoch 12/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0011 - mae: 0.0193 - val_loss: 3.3755e-04 - val_mae: 0.0148 Epoch 13/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0011 - mae: 0.0194 - val_loss: 2.1890e-04 - val_mae: 0.0126 Epoch 14/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 9.8697e-04 - mae: 0.0228 - val_loss: 4.0378e-04 - val_mae: 0.0161 Epoch 15/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0014 - mae: 0.0215 - val_loss: 1.5815e-04 - val_mae: 0.0095 Epoch 16/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0010 - mae: 0.0188 - val_loss: 1.5241e-04 - val_mae: 0.0084 Epoch 17/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - loss: 0.0011 - mae: 0.0190 - val_loss: 1.4074e-04 - val_mae: 0.0082 Epoch 18/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0010 - mae: 0.0187 - val_loss: 3.3456e-04 - val_mae: 0.0148 Epoch 19/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 9.8860e-04 - mae: 0.0196 - val_loss: 1.6235e-04 - val_mae: 0.0097 Epoch 20/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0011 - mae: 0.0192 - val_loss: 1.2785e-04 - val_mae: 0.0080 Epoch 21/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.5907e-04 - mae: 0.0182 - val_loss: 2.7821e-04 - val_mae: 0.0136 Epoch 22/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 0.0013 - mae: 0.0221 - val_loss: 2.2980e-04 - val_mae: 0.0119 Epoch 23/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.3321e-04 - mae: 0.0177 - val_loss: 2.2897e-04 - val_mae: 0.0127 Epoch 24/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.6326e-04 - mae: 0.0171 - val_loss: 1.8956e-04 - val_mae: 0.0103 Epoch 25/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.0895e-04 - mae: 0.0169 - val_loss: 3.4315e-04 - val_mae: 0.0157 Epoch 26/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 9.4998e-04 - mae: 0.0189 - val_loss: 1.5390e-04 - val_mae: 0.0087 Epoch 27/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 8.7986e-04 - mae: 0.0175 - val_loss: 1.2559e-04 - val_mae: 0.0081 Epoch 28/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 7.1703e-04 - mae: 0.0157 - val_loss: 1.1656e-04 - val_mae: 0.0074 Epoch 29/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.9170e-04 - mae: 0.0159 - val_loss: 3.6592e-04 - val_mae: 0.0168 Epoch 30/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.6841e-04 - mae: 0.0175 - val_loss: 1.4055e-04 - val_mae: 0.0097 Epoch 31/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0012 - mae: 0.0193 - val_loss: 1.5999e-04 - val_mae: 0.0099 Epoch 32/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.1401e-04 - mae: 0.0175 - val_loss: 1.4100e-04 - val_mae: 0.0098 Epoch 33/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0010 - mae: 0.0189 - val_loss: 2.7553e-04 - val_mae: 0.0145 Epoch 34/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 8.0730e-04 - mae: 0.0183 - val_loss: 1.3814e-04 - val_mae: 0.0083 Epoch 35/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 0.0011 - mae: 0.0213 - val_loss: 1.7513e-04 - val_mae: 0.0113 Epoch 36/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.6740e-04 - mae: 0.0216 - val_loss: 1.3451e-04 - val_mae: 0.0096 Epoch 37/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.4420e-04 - mae: 0.0173 - val_loss: 9.7174e-05 - val_mae: 0.0077 Epoch 38/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.9973e-04 - mae: 0.0190 - val_loss: 1.2269e-04 - val_mae: 0.0092 Epoch 39/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.4614e-04 - mae: 0.0165 - val_loss: 8.1454e-05 - val_mae: 0.0066 Epoch 40/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 7.5813e-04 - mae: 0.0154 - val_loss: 8.6391e-05 - val_mae: 0.0066 Epoch 41/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 8.2527e-04 - mae: 0.0173 - val_loss: 1.5650e-04 - val_mae: 0.0106 Epoch 42/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 7.0442e-04 - mae: 0.0175 - val_loss: 6.6731e-05 - val_mae: 0.0056 Epoch 43/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 6.5386e-04 - mae: 0.0154 - val_loss: 2.5717e-04 - val_mae: 0.0135 Epoch 44/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 6.9037e-04 - mae: 0.0177 - val_loss: 8.4732e-05 - val_mae: 0.0067 Epoch 45/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.3333e-04 - mae: 0.0165 - val_loss: 1.2596e-04 - val_mae: 0.0085 Epoch 46/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 6.2000e-04 - mae: 0.0142 - val_loss: 1.7775e-04 - val_mae: 0.0118 Epoch 47/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 8.0606e-04 - mae: 0.0164 - val_loss: 1.2970e-04 - val_mae: 0.0097 Epoch 48/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 7.4555e-04 - mae: 0.0141 - val_loss: 1.0925e-04 - val_mae: 0.0089 Epoch 49/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 6.3674e-04 - mae: 0.0166 - val_loss: 6.4746e-05 - val_mae: 0.0057 Epoch 50/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.3739e-04 - mae: 0.0175 - val_loss: 8.6348e-05 - val_mae: 0.0071 Epoch 51/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 6.1029e-04 - mae: 0.0151 - val_loss: 1.7950e-04 - val_mae: 0.0117 Epoch 52/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0011 - mae: 0.0197 - val_loss: 3.9751e-04 - val_mae: 0.0182 Epoch 53/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 8.3818e-04 - mae: 0.0205 - val_loss: 5.6425e-05 - val_mae: 0.0052 Epoch 54/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 8.1495e-04 - mae: 0.0180 - val_loss: 7.5006e-05 - val_mae: 0.0070 Epoch 55/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.9230e-04 - mae: 0.0151 - val_loss: 1.7485e-04 - val_mae: 0.0087 Epoch 56/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.1987e-04 - mae: 0.0149 - val_loss: 8.8326e-05 - val_mae: 0.0076 Epoch 57/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 5.3549e-04 - mae: 0.0136 - val_loss: 7.0879e-05 - val_mae: 0.0057 Epoch 58/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 6.2342e-04 - mae: 0.0139 - val_loss: 3.3876e-04 - val_mae: 0.0164 Epoch 59/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.4244e-04 - mae: 0.0198 - val_loss: 2.4646e-04 - val_mae: 0.0141 Epoch 60/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 8.2960e-04 - mae: 0.0169 - val_loss: 1.1223e-04 - val_mae: 0.0095 Epoch 61/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.8341e-04 - mae: 0.0164 - val_loss: 6.6242e-05 - val_mae: 0.0053 Epoch 62/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 6.1215e-04 - mae: 0.0138 - val_loss: 1.6948e-04 - val_mae: 0.0113 Epoch 63/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 8.7228e-04 - mae: 0.0169 - val_loss: 1.9953e-04 - val_mae: 0.0109 Epoch 64/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.2848e-04 - mae: 0.0156 - val_loss: 1.0757e-04 - val_mae: 0.0087 Epoch 65/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 5.9780e-04 - mae: 0.0149 - val_loss: 3.3949e-04 - val_mae: 0.0171 Epoch 66/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 6.5201e-04 - mae: 0.0142 - val_loss: 5.9108e-05 - val_mae: 0.0054 Epoch 67/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 6.9726e-04 - mae: 0.0153 - val_loss: 5.4804e-05 - val_mae: 0.0052 Epoch 68/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 4.8245e-04 - mae: 0.0132 - val_loss: 4.9300e-05 - val_mae: 0.0049 Epoch 69/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - loss: 7.5551e-04 - mae: 0.0158 - val_loss: 7.8496e-05 - val_mae: 0.0071 Epoch 70/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 58ms/step - loss: 5.9315e-04 - mae: 0.0149 - val_loss: 2.4533e-04 - val_mae: 0.0144 Epoch 71/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 55ms/step - loss: 7.0124e-04 - mae: 0.0162 - val_loss: 2.0084e-04 - val_mae: 0.0113 Epoch 72/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 53ms/step - loss: 5.0515e-04 - mae: 0.0129 - val_loss: 6.2962e-05 - val_mae: 0.0056 Epoch 73/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.2517e-04 - mae: 0.0142 - val_loss: 4.6976e-05 - val_mae: 0.0047 Epoch 74/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 6.4673e-04 - mae: 0.0144 - val_loss: 5.9757e-05 - val_mae: 0.0058 Epoch 75/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 6.5002e-04 - mae: 0.0150 - val_loss: 7.1378e-05 - val_mae: 0.0066 Epoch 76/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 9.6939e-04 - mae: 0.0202 - val_loss: 4.3667e-04 - val_mae: 0.0194 Epoch 77/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 8.9345e-04 - mae: 0.0185 - val_loss: 6.8965e-05 - val_mae: 0.0063 Epoch 78/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 5.9977e-04 - mae: 0.0167 - val_loss: 7.0341e-05 - val_mae: 0.0060 Epoch 79/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 5.3396e-04 - mae: 0.0134 - val_loss: 6.6781e-05 - val_mae: 0.0064 Epoch 80/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 7.8462e-04 - mae: 0.0165 - val_loss: 1.0822e-04 - val_mae: 0.0086 Epoch 81/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 5.0874e-04 - mae: 0.0144 - val_loss: 2.3464e-04 - val_mae: 0.0137 Epoch 82/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 6.2562e-04 - mae: 0.0149 - val_loss: 5.8219e-05 - val_mae: 0.0064 Epoch 83/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.5684e-04 - mae: 0.0163 - val_loss: 1.1107e-04 - val_mae: 0.0089 Epoch 84/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 7.2826e-04 - mae: 0.0177 - val_loss: 1.6702e-04 - val_mae: 0.0097 Epoch 85/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - loss: 6.6232e-04 - mae: 0.0156 - val_loss: 6.8336e-05 - val_mae: 0.0054 Epoch 86/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.9771e-04 - mae: 0.0152 - val_loss: 8.6466e-05 - val_mae: 0.0064 Epoch 87/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.5203e-04 - mae: 0.0169 - val_loss: 5.1111e-05 - val_mae: 0.0047 Epoch 88/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 4.6553e-04 - mae: 0.0142 - val_loss: 2.0932e-04 - val_mae: 0.0125 4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 111ms/step ============================================================ WAVELET-GRU MODEL TRAINING SUMMARY ============================================================ Final epochs trained: 88 Best validation loss: 0.0000 Best validation MAE: 0.0047 Lookback period: 52 weeks Wavelet used: db4 level 3 GRU Model Architecture:
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ gru (GRU) │ (None, 224) │ 152,544 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_2 (Dropout) │ (None, 224) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_2 (Dense) │ (None, 1) │ 225 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 458,309 (1.75 MB)
Trainable params: 152,769 (596.75 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 305,540 (1.17 MB)
============================================================ EVALUATION ON DENOISED DATA ============================================================ MSE: 4140.1010 RMSE: 64.3436 MAE: 48.9869 MAPE: 0.03% R²: 0.9744 Directional Accuracy: 83.17% ============================================================ EVALUATION ON ORIGINAL DATA ============================================================ MSE: 16242.7976 RMSE: 127.4472 MAE: 93.1135 MAPE: 0.06% R²: 0.9095 Directional Accuracy: 38.61%
GRU Residual Analysis: Residual mean: 32.9018 Residual std: 123.1271 Residual min: -388.7794 Residual max: 434.9351 ================================================== FUTURE FORECAST - GRU MODEL (NEXT 12 WEEKS) ================================================== 2024-11-03: 2306.12 2024-11-10: 2252.24 2024-11-17: 2101.93 2024-11-24: 1919.64 2024-12-01: 1756.78 2024-12-08: 1656.57 2024-12-15: 1622.91 2024-12-22: 1627.00 2024-12-29: 1631.12 2025-01-05: 1611.09 2025-01-12: 1563.73 2025-01-19: 1501.98
============================================================ GRU MODEL ADVANTAGES OVER LSTM ============================================================ 1. Computational Efficiency: Fewer parameters (2 gates vs LSTM's 3 gates) 2. Faster Training: Less complex architecture leads to faster training times 3. Better Performance: Often performs better on smaller datasets 4. Reduced Overfitting: Simpler architecture can be less prone to overfitting 5. Memory Efficiency: Uses less memory during training and inference 6. Faster Convergence: Typically converges faster than LSTM 7. Better Gradient Flow: Simpler architecture improves gradient propagation
In [32]:
# --- Step 10: Detailed Model Configuration Report & JSON Export ---
import json
import datetime
import platform
from tensorflow.keras import backend as K
print("\n" + "="*60)
print("WAVELET + GRU MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)
report = {}
# Optimizer details
try:
opt = final_model.optimizer
opt_name = opt.__class__.__name__
try:
lr_val = float(K.get_value(opt.learning_rate))
except Exception:
opt_cfg = opt.get_config()
lr_val = opt_cfg.get('learning_rate', None)
try:
lr_val = float(lr_val)
except Exception:
pass
report['optimizer'] = opt_name
report['learning_rate'] = lr_val
print(f"Optimizer: {opt_name}")
print(f"Learning Rate: {lr_val}")
except Exception as e:
report['optimizer'] = str(e)
print(f"Could not fetch optimizer details: {e}")
# Hyperparameters (from tuner if available)
try:
report['best_hyperparameters'] = best_hp.values
print("\nBest Hyperparameters (from tuner):")
for k, v in best_hp.values.items():
print(f" {k}: {v}")
except Exception as e:
report['best_hyperparameters'] = None
print(f"No best hyperparameters found: {e}")
# Model layers
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
layer_info = {
'index': i+1,
'class_name': layer.__class__.__name__,
'name': layer.name
}
if hasattr(layer, 'units'):
layer_info['units'] = getattr(layer, 'units', None)
print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
else:
print(f" Layer {i+1}: {layer.__class__.__name__}")
if hasattr(layer, 'activation'):
try:
layer_info['activation'] = layer.activation.__name__
except Exception:
layer_info['activation'] = str(layer.activation)
if hasattr(layer, 'rate'):
layer_info['dropout_rate'] = getattr(layer, 'rate', None)
if hasattr(layer, 'return_sequences'):
layer_info['return_sequences'] = getattr(layer, 'return_sequences', None)
try:
layer_info['input_shape'] = layer.input_shape
layer_info['output_shape'] = layer.output_shape
except:
layer_info['input_shape'] = None
layer_info['output_shape'] = None
layers_report.append(layer_info)
report['layers'] = layers_report
# Training summary
training_summary = {
'lookback': lookback,
'epochs_trained': len(history.history['loss']),
'final_training_loss': float(history.history['loss'][-1]),
'final_validation_loss': float(min(history.history['val_loss'])),
'final_training_mae': float(history.history['mae'][-1]) if 'mae' in history.history else None,
'final_validation_mae': float(min(history.history['val_mae'])) if 'val_mae' in history.history else None,
'wavelet': wavelet,
'wavelet_level': level
}
report['training_summary'] = training_summary
print("\nTraining Summary:")
for k, v in training_summary.items():
print(f" {k}: {v}")
# Evaluation
try:
report['evaluation_metrics'] = {
'denoised': metrics_denoised,
'original': metrics_original
}
print("\nEvaluation Metrics attached.")
except Exception as e:
report['evaluation_metrics'] = None
print(f"Could not attach evaluation metrics: {e}")
# Residual stats
try:
residuals_stats = {
'residual_mean': float(np.mean(residuals)),
'residual_std': float(np.std(residuals)),
'residual_min': float(np.min(residuals)),
'residual_max': float(np.max(residuals))
}
report['residuals'] = residuals_stats
print("\nResiduals Summary attached.")
except Exception as e:
print(f"Residual stats failed: {e}")
# Future forecast (if available)
try:
forecast_report = {
'dates': [str(d) for d in future_dates],
'forecasted_prices': [float(p) for p in future_prices]
}
report['future_forecast'] = forecast_report
print("\nFuture forecast added to report.")
except Exception as e:
report['future_forecast'] = None
print(f"Future forecast not added: {e}")
# Metadata
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__
# Save JSON
report_filename = "wavelet_gru_report.json"
with open(report_filename, "w", encoding="utf-8") as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\nSaved detailed report to: {report_filename}")
print("="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================ WAVELET + GRU MODEL CONFIGURATION & TRAINING REPORT ============================================================ Optimizer: Adam Learning Rate: 0.0020513867493718863 Best Hyperparameters (from tuner): num_layers: 1 units_0: 224 dropout_0: 0.4 dense_layers: 0 learning_rate: 0.002051386718289359 units_1: 96 dropout_1: 0.4 units_2: 160 dropout_2: 0.30000000000000004 dense_units_0: 112 dense_dropout_0: 0.2 dense_units_1: 112 dense_dropout_1: 0.2 Model Layers: Layer 1: GRU - units: 224 Layer 2: Dropout Layer 3: Dense - units: 1 Training Summary: lookback: 52 epochs_trained: 82 final_training_loss: 0.0003886503691319376 final_validation_loss: 4.182316843071021e-05 final_training_mae: 0.012498756870627403 final_validation_mae: 0.004680353216826916 wavelet: db4 wavelet_level: 3 Evaluation Metrics attached. Residuals Summary attached. Future forecast added to report. Saved detailed report to: wavelet_gru_report.json ============================================================ REPORT COMPLETE ============================================================
In [33]:
# --- Step 9: Detailed Model Configuration Report ---
print("\n" + "="*60)
print("GRU MODEL CONFIGURATION & TRAINING DETAILS")
print("="*60)
# Optimizer details
optimizer_config = final_model.optimizer.get_config()
print(f"Optimizer: {final_model.optimizer.__class__.__name__}")
print(f"Learning Rate: {optimizer_config['learning_rate']}")
# Model architecture details
for i, layer in enumerate(final_model.layers):
print(f"\nLayer {i+1}: {layer.__class__.__name__}")
try:
print(f" Units: {layer.units}")
except:
pass
try:
print(f" Activation: {layer.activation.__name__}")
except:
pass
try:
print(f" Dropout Rate: {layer.rate}")
except:
pass
if hasattr(layer, "return_sequences"):
print(f" Return Sequences: {layer.return_sequences}")
# Training summary
print("\nTraining Details:")
print(f"Epochs Trained: {len(history.history['loss'])}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Training MAE: {history.history['mae'][-1]:.4f}")
print(f"Final Validation MAE: {history.history['val_mae'][-1]:.4f}")
print("\n" + "="*60)
print("NOTE: The above configuration includes optimizer, activation functions, "
"learning rate, and automatic layer details for full reproducibility.")
print("="*60)
============================================================ GRU MODEL CONFIGURATION & TRAINING DETAILS ============================================================ Optimizer: Adam Learning Rate: 0.0020513867493718863 Layer 1: GRU Units: 224 Activation: tanh Return Sequences: False Layer 2: Dropout Dropout Rate: 0.4 Layer 3: Dense Units: 1 Activation: linear Training Details: Epochs Trained: 82 Final Training Loss: 0.0004 Final Validation Loss: 0.0001 Final Training MAE: 0.0125 Final Validation MAE: 0.0055 ============================================================ NOTE: The above configuration includes optimizer, activation functions, learning rate, and automatic layer details for full reproducibility. ============================================================
In [7]:
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 8))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2) # Added GRU
plt.title('Actual vs Predicted - GRU Model (Test Period)') # Added GRU
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/wlst_result19.png", dpi=300, bbox_inches='tight')
plt.show()
In [11]:
import pywt
# Assume 'data' is your time series (e.g., prices)
# Perform 3-level Haar wavelet decomposition
coeffs_original = pywt.wavedec(data, 'haar', level=3) # data should be your time series
labels = ['a3', 'd3', 'd2', 'd1']
plt.figure(figsize=(12, 8))
for i, c in enumerate(coeffs_original):
plt.subplot(len(coeffs_original), 1, i + 1)
plt.plot(c, label=labels[i])
plt.legend(loc='upper right')
plt.grid(True)
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/wlst_result119.png", dpi=300, bbox_inches='tight')
plt.show()
Empirical Mode Decomposition¶
EMD + ARIMA¶
In [3]:
import warnings
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # Import matplotlib.pyplot
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
warnings.filterwarnings("ignore")
In [142]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [143]:
df.head()
Out[143]:
| State Name | District Name | Market Name | Variety | Group | Arrivals (Tonnes) | Min Price (Rs./Quintal) | Max Price (Rs./Quintal) | Modal Price (Rs./Quintal) | Date | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 13.0 | 1500 | 1700 | 1650 | 2010-06-08 |
| 1 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 11.0 | 1300 | 1750 | 1500 | 2010-06-13 |
| 2 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1400 | 1800 | 1600 | 2010-06-20 |
| 3 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1300 | 1800 | 1650 | 2010-06-27 |
| 4 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1400 | 1850 | 1600 | 2010-07-11 |
In [144]:
df[" Date"] = pd.to_datetime(df[" Date"])
df.set_index(" Date", inplace=True)
In [145]:
df['Modal Price (Rs./kg)']=df['Modal Price (Rs./Quintal)']/100
In [148]:
data = df['Modal Price (Rs./kg)'].values
In [149]:
df.head()
Out[149]:
| State Name | District Name | Market Name | Variety | Group | Arrivals (Tonnes) | Min Price (Rs./Quintal) | Max Price (Rs./Quintal) | Modal Price (Rs./Quintal) | Modal Price (Rs./kg) | |
|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||
| 2010-06-08 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 13.0 | 1500 | 1700 | 1650 | 16.5 |
| 2010-06-13 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 11.0 | 1300 | 1750 | 1500 | 15.0 |
| 2010-06-20 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1400 | 1800 | 1600 | 16.0 |
| 2010-06-27 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1300 | 1800 | 1650 | 16.5 |
| 2010-07-11 | Kerala | Idukki | Nedumkandam | Green Medium | Spices | 16.5 | 1400 | 1850 | 1600 | 16.0 |
Plot original data¶
In [151]:
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Cardamom Price') # Specify x and y data
plt.title('Weekly Cardamom Price Time Series')
plt.xlabel('Date')
plt.ylabel('price')
plt.legend()
plt.grid()
plt.show()
=== Step 2: Apply EMD ===¶
In [156]:
from PyEMD import EMD
from PyEMD import EMD
emd = EMD()
imfs = emd.emd(data)
selected_imfs = imfs[3:6]
=== Step 3: Visualize IMFs ===¶
In [160]:
plt.figure(figsize=(16, 20))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r')
plt.title("Original Time Series")
plt.grid()
for i, imf in enumerate(imfs):
plt.subplot(len(imfs)+1, 1, i+2)
plt.plot(df.index, imf, 'g')
plt.title(f"IMF {i+1}")
plt.grid()
plt.tight_layout()
plt.show()
=== Step 4: Train/Val/Test Split ===¶
In [163]:
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
train_val_data = data[:train_size + val_size]
test_data = data[train_size + val_size:]
In [164]:
# Plot the data split without inverse transformation
plt.figure(figsize=(12, 5))
plt.plot(np.arange(total_size), data, label='Full Data')
plt.axvspan(0, train_end, color='green', alpha=0.2, label='Train')
plt.axvspan(train_end, val_end, color='orange', alpha=0.2, label='Validation')
plt.axvspan(val_end, total_size, color='red', alpha=0.2, label='Test')
plt.title("Train, Validation, and Test Splits")
plt.xlabel("Time Steps")
plt.ylabel("Price (Rs./Quintal)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
=== Step 5: Time Series CV + ARIMA per IMF ===¶
In [167]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import numpy as np
In [176]:
# Define ARIMA hyperparameter ranges
p_values = [0, 1, 2,3,4,5,6]
d_values = [0]
q_values = [0, 1, 2,3,4]
# Now define the parameter grid
param_grid = {
'p': p_values,
'd': d_values,
'q': q_values
}
In [178]:
imf_predictions = []
best_params_summary = []
In [180]:
selected_imfs = imfs[3:6]
Tuning ARIMA for each selected IMF¶
In [ ]:
for imf_index, imf in enumerate(selected_imfs, start=3):
print(f"🔍 Tuning ARIMA for IMF {imf_index}")
best_score = float('inf')
best_params = None
best_forecast = None
tscv = TimeSeriesSplit(n_splits=3)
for params in ParameterGrid(param_grid):
fold_losses = []
for train_idx, val_idx in tscv.split(imf):
train_series = imf[train_idx]
val_series = imf[val_idx]
try:
model = SARIMAX(
train_series,
order=(params['p'], params['d'], params['q']),
enforce_stationarity=False,
enforce_invertibility=False
)
model_fit = model.fit(disp=False)
val_forecast = model_fit.forecast(steps=len(val_series))
score = mean_squared_error(val_series, val_forecast)
fold_losses.append(score)
except:
continue # skip bad models
if fold_losses:
avg_loss = np.mean(fold_losses)
if avg_loss < best_score:
best_score = avg_loss
best_params = params
try:
full_model = SARIMAX(
imf,
order=(params['p'], params['d'], params['q']),
enforce_stationarity=False,
enforce_invertibility=False
)
full_model_fit = full_model.fit(disp=False)
forecast = full_model_fit.forecast(steps=test_size) # test_size must be defined
best_forecast = forecast
except:
continue
print(f"✅ Best ARIMA params for IMF {imf_index}: {best_params} with MSE={best_score:.4f}")
best_params_summary.append({
"IMF": imf_index,
"p": best_params['p'],
"d": best_params['d'],
"q": best_params['q'],
"Validation MSE": best_score
})
imf_predictions.append(best_forecast)
🔍 Tuning ARIMA for IMF 3
Final refitting on train+val and forecasting on test using best ARIMA parameters¶
In [ ]:
full_model_summaries = []
for imf, summary in zip(selected_imfs, best_params_summary):
imf_index = summary["IMF"]
params = summary
# Separate train and test
train_data = imf[:train_size + val_size]
test_data = imf[train_size + val_size:]
model = SARIMAX(
train_data,
order=(params['p'], params['d'], params['q']),
enforce_stationarity=False,
enforce_invertibility=False
)
model_fit = model.fit(disp=False)
forecast = model_fit.forecast(steps=len(test_data))
full_model_summaries.append({
"IMF": imf_index,
"Test MSE": mean_squared_error(test_data, forecast)
})
Combine forecasts from selected IMFs¶
In [ ]:
# Reconstruct forecast
reconstructed_forecast = np.sum(imf_predictions, axis=0)
=== Step 9: Evaluation ===¶
In [ ]:
actual = df['Modal Price (Rs./kg)'].values[train_size + val_size:]
forecast_index = df.index[train_size + val_size:]
In [ ]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
# Ensure arrays are numpy-compatible and flattened
actual = np.asarray(actual).flatten()
reconstructed_forecast = np.asarray(reconstructed_forecast).flatten()
# Evaluation Metrics
final_rmse = np.sqrt(mean_squared_error(actual, reconstructed_forecast))
final_mape = np.mean(np.abs((actual - reconstructed_forecast) / actual)) * 100
final_mae = mean_absolute_error(actual, reconstructed_forecast)
final_r2 = r2_score(actual, reconstructed_forecast)
# Directional Accuracy (DA)
if len(actual) > 1 and len(reconstructed_forecast) > 1:
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(reconstructed_forecast))
if len(actual_diff) == len(forecast_diff):
final_da = np.mean(actual_diff == forecast_diff) * 100
else:
final_da = np.nan
else:
final_da = np.nan
# Print Results
print(f"\n🎯 Final Reconstructed Forecast Evaluation:")
print(f"RMSE: {final_rmse:.2f}")
print(f"MAPE: {final_mape:.2f}%")
print(f"MAE: {final_mae:.2f}")
print(f"R²: {final_r2:.4f}")
print(f"Directional Accuracy: {final_da:.2f}%" if not np.isnan(final_da) else "Directional Accuracy: N/A (insufficient data)")
In [ ]:
=== Step 10: Plotting Forecast vs Actual ===¶
In [159]:
# After your existing code up to the final plot, replace with this:
# Get the actual date indices for each segment
train_dates = df.index[:train_size]
val_dates = df.index[train_size:train_size+val_size]
test_dates = df.index[train_size+val_size:]
plt.figure(figsize=(14, 6))
# Plot all segments with proper date indexing
plt.plot(train_dates, data[:train_size], label="Train", color='green')
plt.plot(val_dates, data[train_size:train_size+val_size], label="Validation", color='orange')
plt.plot(test_dates, data[train_size+val_size:], label="Test (Actual)", color='red')
# Connect the last validation point to first forecast point
plt.plot([val_dates[-1], test_dates[0]],
[data[train_size+val_size-1], reconstructed_forecast[0]],
'--', color='blue', alpha=0.3) # Connection line
# Plot the forecast
plt.plot(test_dates, reconstructed_forecast, label="Forecast (EMD+SARIMA)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (EMD+SARIMA)\nRMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./Quintal)")
plt.legend()
# Improve x-axis date formatting
plt.gca().xaxis.set_major_locator(plt.MaxNLocator(10)) # Reduce number of x-ticks
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
In [160]:
plt.plot(test_dates, data[train_size+val_size:], label="Test (Actual)", color='red')
# Connect the last validation point to first forecast point
plt.plot([val_dates[-1], test_dates[0]],
[data[train_size+val_size-1], reconstructed_forecast[0]],
'--', color='blue', alpha=0.3) # Connection line
# Plot the forecast
plt.plot(test_dates, reconstructed_forecast, label="Forecast (EMD+SARIMA)", linestyle='--', color='blue')
Out[160]:
[<matplotlib.lines.Line2D at 0x1faae9278f0>]
In [3]:
import warnings
import numpy as np
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from PyEMD import EMD
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # Import matplotlib.pyplot
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
warnings.filterwarnings("ignore")
# Suppress warnings
warnings.filterwarnings("ignore")
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
# EMD Decomposition
emd = EMD()
imfs = emd.emd(data)
# Plot IMFs
plt.figure(figsize=(16, 20))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r')
plt.title("Original Time Series")
plt.grid()
for i, imf in enumerate(imfs):
plt.subplot(len(imfs)+1, 1, i+2)
plt.plot(df.index, imf, 'g')
plt.title(f"IMF {i+1}")
plt.grid()
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/eAR_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Data splitting
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:]
# Hyperparameter tuning for ARIMA
param_grid = {
'p': range(0, 7),
'd': range(0,1),
'q': range(0,7)
}
best_arima_params = {}
best_imf_models = {}
# Train ARIMA on each IMF
for i, imf in enumerate(imfs):
print(f"\nTraining ARIMA for IMF {i+1}")
best_score = float('inf')
best_params = None
best_model = None
for params in ParameterGrid(param_grid):
try:
model = ARIMA(imf[:train_size], order=(params['p'], params['d'], params['q']))
model_fit = model.fit()
val_pred = model_fit.forecast(steps=len(val_data))
score = mean_squared_error(imf[train_size:train_size+val_size], val_pred)
if score < best_score:
best_score = score
best_params = params
best_model = model_fit
except:
continue
best_arima_params[f'IMF_{i+1}'] = best_params
best_imf_models[f'IMF_{i+1}'] = best_model
print(f"Best params for IMF {i+1}: {best_params} with MSE: {best_score:.4f}")
# Reconstruct and evaluate on test set
test_predictions = np.zeros(len(test_data))
for i, (imf_name, model) in enumerate(best_imf_models.items()):
imf_pred = model.forecast(steps=len(test_data))
test_predictions += imf_pred
# Evaluation metrics
mse = mean_squared_error(test_data, test_predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(test_data, test_predictions)
mape = mean_absolute_percentage_error(test_data, test_predictions)
r2 = r2_score(test_data, test_predictions)
print("\nFinal Evaluation Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}")
print(f"R²: {r2:.4f}")
# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("EMD-ARIMA: Actual vs Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/eAR_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Model summary for each IMF
for imf_name, model in best_imf_models.items():
print(f"\n{imf_name} ARIMA Model Summary:")
print(model.summary())
# Plot diagnostics
model.plot_diagnostics(figsize=(12, 8))
plt.suptitle(f"{imf_name} ARIMA Diagnostics", y=1.02)
plt.tight_layout()
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/eAR_result3.png", dpi=300, bbox_inches='tight')
plt.show()
Training ARIMA for IMF 1
Best params for IMF 1: {'d': 0, 'p': 1, 'q': 5} with MSE: 1750.0254
Training ARIMA for IMF 2
Best params for IMF 2: {'d': 0, 'p': 3, 'q': 0} with MSE: 3165.6733
Training ARIMA for IMF 3
Best params for IMF 3: {'d': 0, 'p': 2, 'q': 3} with MSE: 6727.5757
Training ARIMA for IMF 4
Best params for IMF 4: {'d': 0, 'p': 6, 'q': 2} with MSE: 10721.9262
Training ARIMA for IMF 5
Best params for IMF 5: {'d': 0, 'p': 4, 'q': 6} with MSE: 168133.2143
Training ARIMA for IMF 6
Best params for IMF 6: {'d': 0, 'p': 6, 'q': 5} with MSE: 62.9625
Final Evaluation Metrics:
MSE: 209150.1327
RMSE: 457.3293
MAE: 394.7422
MAPE: 0.2738
R²: -0.0522
IMF_1 ARIMA Model Summary:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 505
Model: ARIMA(1, 0, 5) Log Likelihood -3093.977
Date: Thu, 06 Nov 2025 AIC 6203.954
Time: 22:21:58 BIC 6237.750
Sample: 0 HQIC 6217.210
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 0.8959 15.708 0.057 0.955 -29.891 31.683
ar.L1 0.5761 0.086 6.703 0.000 0.408 0.745
ma.L1 -0.4222 0.088 -4.821 0.000 -0.594 -0.251
ma.L2 0.1689 0.023 7.403 0.000 0.124 0.214
ma.L3 0.0302 0.032 0.957 0.339 -0.032 0.092
ma.L4 0.1166 0.039 2.957 0.003 0.039 0.194
ma.L5 0.1655 0.054 3.072 0.002 0.060 0.271
sigma2 1.226e+04 344.827 35.559 0.000 1.16e+04 1.29e+04
===================================================================================
Ljung-Box (L1) (Q): 0.13 Jarque-Bera (JB): 7125.23
Prob(Q): 0.72 Prob(JB): 0.00
Heteroskedasticity (H): 20.79 Skew: -1.21
Prob(H) (two-sided): 0.00 Kurtosis: 21.24
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
IMF_2 ARIMA Model Summary:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 505
Model: ARIMA(3, 0, 0) Log Likelihood -2733.353
Date: Thu, 06 Nov 2025 AIC 5476.706
Time: 22:22:02 BIC 5497.829
Sample: 0 HQIC 5484.991
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 11.7618 100.250 0.117 0.907 -184.725 208.248
ar.L1 2.0483 0.012 164.464 0.000 2.024 2.073
ar.L2 -1.7123 0.021 -81.475 0.000 -1.753 -1.671
ar.L3 0.6377 0.013 49.607 0.000 0.613 0.663
sigma2 2912.5351 66.936 43.512 0.000 2781.343 3043.728
===================================================================================
Ljung-Box (L1) (Q): 91.23 Jarque-Bera (JB): 29580.25
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 50.33 Skew: 1.43
Prob(H) (two-sided): 0.00 Kurtosis: 40.38
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
IMF_3 ARIMA Model Summary:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 505
Model: ARIMA(2, 0, 3) Log Likelihood -1638.131
Date: Thu, 06 Nov 2025 AIC 3290.262
Time: 22:22:07 BIC 3319.834
Sample: 0 HQIC 3301.862
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 5.0279 14.706 0.342 0.732 -23.795 33.851
ar.L1 1.7983 0.006 310.468 0.000 1.787 1.810
ar.L2 -0.8979 0.006 -149.878 0.000 -0.910 -0.886
ma.L1 1.8303 0.016 113.065 0.000 1.799 1.862
ma.L2 1.4365 0.025 56.927 0.000 1.387 1.486
ma.L3 0.4791 0.015 32.146 0.000 0.450 0.508
sigma2 37.3411 0.893 41.817 0.000 35.591 39.091
===================================================================================
Ljung-Box (L1) (Q): 22.13 Jarque-Bera (JB): 27653.31
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 204.27 Skew: -0.46
Prob(H) (two-sided): 0.00 Kurtosis: 39.24
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
IMF_4 ARIMA Model Summary:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 505
Model: ARIMA(6, 0, 2) Log Likelihood 429.574
Date: Thu, 06 Nov 2025 AIC -839.149
Time: 22:22:10 BIC -796.903
Sample: 0 HQIC -822.578
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 16.3807 43.958 0.373 0.709 -69.776 102.537
ar.L1 3.3543 0.490 6.842 0.000 2.393 4.315
ar.L2 -3.5991 1.770 -2.033 0.042 -7.068 -0.130
ar.L3 0.4667 2.311 0.202 0.840 -4.063 4.996
ar.L4 1.5751 1.227 1.284 0.199 -0.829 3.979
ar.L5 -0.9290 0.324 -2.870 0.004 -1.563 -0.295
ar.L6 0.1318 0.133 0.990 0.322 -0.129 0.393
ma.L1 1.0005 0.503 1.989 0.047 0.015 1.986
ma.L2 0.1576 0.342 0.460 0.645 -0.513 0.828
sigma2 0.0099 0.000 31.044 0.000 0.009 0.011
===================================================================================
Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 46089.69
Prob(Q): 0.81 Prob(JB): 0.00
Heteroskedasticity (H): 5.69 Skew: -1.09
Prob(H) (two-sided): 0.00 Kurtosis: 49.75
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
IMF_5 ARIMA Model Summary:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 505
Model: ARIMA(4, 0, 6) Log Likelihood 2215.512
Date: Thu, 06 Nov 2025 AIC -4407.024
Time: 22:22:15 BIC -4356.329
Sample: 0 HQIC -4387.140
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 33.1840 0.000 1.08e+05 0.000 33.183 33.185
ar.L1 3.9437 0.000 1.06e+04 0.000 3.943 3.944
ar.L2 -5.8380 0.002 -2854.519 0.000 -5.842 -5.834
ar.L3 3.8449 0.003 1294.961 0.000 3.839 3.851
ar.L4 -0.9506 0.001 -732.655 0.000 -0.953 -0.948
ma.L1 0.5451 0.021 25.382 0.000 0.503 0.587
ma.L2 0.0285 0.013 2.196 0.028 0.003 0.054
ma.L3 -0.0949 0.020 -4.721 0.000 -0.134 -0.055
ma.L4 0.1659 0.027 6.226 0.000 0.114 0.218
ma.L5 -0.0416 0.012 -3.497 0.000 -0.065 -0.018
ma.L6 -0.1280 0.028 -4.561 0.000 -0.183 -0.073
sigma2 8.068e-06 2.28e-07 35.362 0.000 7.62e-06 8.51e-06
===================================================================================
Ljung-Box (L1) (Q): 0.09 Jarque-Bera (JB): 10712.40
Prob(Q): 0.76 Prob(JB): 0.00
Heteroskedasticity (H): 0.36 Skew: -0.71
Prob(H) (two-sided): 0.00 Kurtosis: 25.52
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 3.42e+17. Standard errors may be unstable.
IMF_6 ARIMA Model Summary:
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 505
Model: ARIMA(6, 0, 5) Log Likelihood 1410.152
Date: Thu, 06 Nov 2025 AIC -2794.305
Time: 22:22:18 BIC -2739.386
Sample: 0 HQIC -2772.764
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 874.8754 8.475 103.225 0.000 858.264 891.487
ar.L1 -1.5260 0.003 -487.551 0.000 -1.532 -1.520
ar.L2 1.0409 0.008 136.127 0.000 1.026 1.056
ar.L3 3.0890 0.006 490.915 0.000 3.077 3.101
ar.L4 0.8510 0.006 150.759 0.000 0.840 0.862
ar.L5 -1.5335 0.004 -365.778 0.000 -1.542 -1.525
ar.L6 -0.9219 0.003 -305.895 0.000 -0.928 -0.916
ma.L1 4.5909 0.027 170.531 0.000 4.538 4.644
ma.L2 8.7593 0.054 163.319 0.000 8.654 8.864
ma.L3 8.6709 0.045 192.230 0.000 8.583 8.759
ma.L4 4.4523 0.019 236.755 0.000 4.415 4.489
ma.L5 0.9493 0.006 151.144 0.000 0.937 0.962
sigma2 0.0002 1.9e-05 10.710 0.000 0.000 0.000
===================================================================================
Ljung-Box (L1) (Q): 476.13 Jarque-Bera (JB): 126.77
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.45 Skew: -0.76
Prob(H) (two-sided): 0.00 Kurtosis: 4.93
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [ ]:
# Plot actual vs predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("EMD-ARIMA: Actual vs Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/eAR_result11.png", dpi=300, bbox_inches='tight')
plt.show()
In [5]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from PyEMD import EMD
from itertools import product
from tqdm import tqdm
# Suppress warnings
warnings.filterwarnings("ignore")
# Load data
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)'] / 100
data = df['Modal Price (Rs./kg)'].values
# EMD decomposition
emd = EMD()
emd.extrema_detection = "parabol"
imfs = emd.emd(data, max_imf=5)
# Filter meaningful IMFs
imfs = [imf for imf in imfs if np.var(imf) > 0.05 * np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")
# Combine IMFs into one reconstructed signal
reconstructed_signal = np.sum(imfs, axis=0)
# Data splitting
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
train_data = reconstructed_signal[:train_size]
val_data = reconstructed_signal[train_size:train_size+val_size]
test_data = reconstructed_signal[train_size+val_size:]
# Parameter grid for ARIMA (p, d, q)
param_grid = {
'order': [(1,0,0), (1,1,0), (2,1,0), (2,1,2), (3,0,2), (5,1,0)]
}
best_score = np.inf
best_model = None
best_params = None
# Train ARIMA on combined signal
for order in tqdm(param_grid['order']):
try:
model = ARIMA(train_data, order=order)
model_fit = model.fit()
val_pred = model_fit.forecast(steps=val_size)
mse = mean_squared_error(val_data, val_pred)
if mse < best_score:
best_score = mse
best_model = model_fit
best_params = order
except:
continue
print(f"\nBest ARIMA Params: {best_params} | Validation MSE: {best_score:.4f}")
# Forecast
test_predictions = best_model.forecast(steps=test_size)
# Evaluation
metrics = {
'MSE': mean_squared_error(test_data, test_predictions),
'RMSE': np.sqrt(mean_squared_error(test_data, test_predictions)),
'MAE': mean_absolute_error(test_data, test_predictions),
'MAPE': mean_absolute_percentage_error(test_data, test_predictions),
'R²': r2_score(test_data, test_predictions)
}
print("\nFinal Evaluation Metrics:")
for k, v in metrics.items():
print(f"{k}: {v:.4f}")
# Plot
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("EMD–ARIMA (Combined IMFs): Actual vs Predicted")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eAR_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Model summary
print(best_model.summary())
Selected 6 meaningful IMFs
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00, 3.32it/s]
Best ARIMA Params: (1, 0, 0) | Validation MSE: 23103.7183 Final Evaluation Metrics: MSE: 592684.4402 RMSE: 769.8600 MAE: 684.7383 MAPE: 0.4179 R²: -1.9817
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 505
Model: ARIMA(1, 0, 0) Log Likelihood -3322.188
Date: Thu, 06 Nov 2025 AIC 6650.375
Time: 11:23:46 BIC 6663.049
Sample: 0 HQIC 6655.346
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 942.1786 236.343 3.986 0.000 478.955 1405.402
ar.L1 0.9550 0.014 66.751 0.000 0.927 0.983
sigma2 3.02e+04 532.743 56.682 0.000 2.92e+04 3.12e+04
===================================================================================
Ljung-Box (L1) (Q): 11.50 Jarque-Bera (JB): 66137.94
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 20.92 Skew: 2.40
Prob(H) (two-sided): 0.00 Kurtosis: 58.86
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [3]:
# Model summary for each IMF
for imf_name, model in best_imf_models.items():
print(f"\n{imf_name} ARIMA Model Summary:")
print(model.summary())
# Plot diagnostics
model.plot_diagnostics(figsize=(12, 8))
plt.suptitle(f"{imf_name} ARIMA Diagnostics", y=1.02)
plt.tight_layout()
plt.tight_layout()
plt.suptitle("Wavelet Decomposition Components (haar, level=3)", y=1.02)
plt.savefig("C:/Users/marti/Desktop/png/eAR_result3.png", dpi=300, bbox_inches='tight')
plt.show()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[3], line 2 1 # Model summary for each IMF ----> 2 for imf_name, model in best_imf_models.items(): 3 print(f"\n{imf_name} ARIMA Model Summary:") 4 print(model.summary()) NameError: name 'best_imf_models' is not defined
In [36]:
# --- Make sure arrays are flattened ---
test_data = np.asarray(test_data).flatten()
test_predictions = np.asarray(test_predictions).flatten()
# --- Generate DataFrame ---
results_df = pd.DataFrame({
'Date': df.index[train_size + val_size : train_size + val_size + len(test_data)],
'Actual': test_data,
'Predicted': test_predictions
})
# --- Save as TSV ---
results_df.to_csv('emd_sarima_test_vs_predicted.tsv',
sep='\t',
index=False,
float_format='%.4f')
print("\n✅ File saved: 'emd_sarima_test_vs_predicted.tsv'")
✅ File saved: 'emd_sarima_test_vs_predicted.tsv'
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
=== EMD + SARIMA ===¶
=== Import Libraries ===¶
In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PyEMD import EMD
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
from sklearn.model_selection import TimeSeriesSplit
import warnings
warnings.filterwarnings("ignore")
=== Step 1: Load and preprocess data ===¶
In [16]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [17]:
df[" Date"] = pd.to_datetime(df[" Date"])
df.set_index(" Date", inplace=True)
In [20]:
df["Modal Price (Rs./Kg)"]= df["Modal Price (Rs./Quintal)"]/100
In [22]:
df = df[["Modal Price (Rs./Kg)"]].dropna()
df.rename(columns={"Modal Price (Rs./Kg)": "Price"}, inplace=True)
price_values = df['Price'].values
In [24]:
Date=df.index
=== Plot Original Series ===¶
In [23]:
plt.figure(figsize=(12, 6))
plt.plot(df.index, price_values)
plt.title('Original Cardamom Price Time Series')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eSAR_result1.png", dpi=300, bbox_inches='tight')
plt.show()
=== Apply EMD ===¶
In [26]:
emd = EMD()
imfs = emd(price_values)
selected_imf_indices = list(range(3,6 ))
selected_imfs = imfs[selected_imf_indices]
=== Visualize IMFs ===¶
In [29]:
plt.figure(figsize=(16, 20))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, price_values, 'r')
plt.title("Original Time Series")
plt.grid()
for i, imf in enumerate(imfs):
plt.subplot(len(imfs)+1, 1, i+2)
plt.plot(df.index, imf, 'g')
plt.title(f"IMF {i+1}")
plt.grid()
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eSAR_result3.png", dpi=300, bbox_inches='tight')
plt.show()
=== Split Data ===¶
In [18]:
total_size = len(df)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
train_val_idx = slice(0, train_size + val_size)
test_idx = slice(train_size + val_size, total_size)
=== Step 10: Plot Full Data Split ===¶
=== SARIMA Hyperparameter Tuning ===¶
In [35]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import numpy as np
Define SARIMA Hyperparameter Options¶
In [38]:
p_values = [0, 1, 2,3,4,5,6]
d_values = [0, 1]
q_values = [0, 1, 2,3,4,5]
P_values = [0, 1,2,3,4,5]
D_values = [0, 1]
Q_values = [0, 1,2,3,4,5]
m = 26 # seasonal period
param_grid = {
'p': p_values,
'd': d_values,
'q': q_values,
'P': P_values,
'D': D_values,
'Q': Q_values
}
Initialize empty lists¶
In [41]:
imf_predictions = []
best_params_summary = []
Select IMFs (Example: IMF 3 to IMF 6)¶
In [44]:
selected_imfs = imfs
from tqdm import tqdm # Add this import at the top with other imports
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
Start tuning¶
In [47]:
# Reconstruction and evaluation
reconstructed_forecast = np.sum(imf_predictions, axis=0)
actual = df['Price'].values[train_size + val_size:]
forecast_index = df.index[train_size + val_size:]
# Ensure arrays are NumPy and 1D
actual = np.asarray(actual).flatten()
reconstructed_forecast = np.asarray(reconstructed_forecast).flatten()
# Core Metrics
final_rmse = np.sqrt(mean_squared_error(actual, reconstructed_forecast))
final_mape = np.mean(np.abs((actual - reconstructed_forecast) / actual)) * 100
final_mae = mean_absolute_error(actual, reconstructed_forecast)
final_r2 = r2_score(actual, reconstructed_forecast)
# Directional Accuracy
if len(actual) > 1 and len(reconstructed_forecast) > 1:
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(reconstructed_forecast))
final_da = np.mean(actual_diff == forecast_diff) * 100 if len(actual_diff) == len(forecast_diff) else np.nan
else:
final_da = np.nan
# Print Results
print(f"\n🎯 Final Reconstructed Forecast Evaluation:")
print(f"RMSE: {final_rmse:.2f}")
print(f"MAPE: {final_mape:.2f}%")
print(f"MAE: {final_mae:.2f}")
print(f"R²: {final_r2:.4f}")
print(f"Directional Accuracy: {final_da:.2f}%" if not np.isnan(final_da) else "Directional Accuracy: N/A")
# Plot results
plt.figure(figsize=(14, 6))
plt.plot(df.index[:train_size], df['Price'].values[:train_size], label="Train", color='green')
plt.plot(df.index[train_size:train_size + val_size], df['Price'].values[train_size:train_size + val_size], label="Validation", color='orange')
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (EMD-SARIMA)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (EMD-SARIMA)\nSelected IMFs: 2–6 | RMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[47], line 11 8 reconstructed_forecast = np.asarray(reconstructed_forecast).flatten() 10 # Core Metrics ---> 11 final_rmse = np.sqrt(mean_squared_error(actual, reconstructed_forecast)) 12 final_mape = np.mean(np.abs((actual - reconstructed_forecast) / actual)) * 100 13 final_mae = mean_absolute_error(actual, reconstructed_forecast) File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs) 207 try: 208 with config_context( 209 skip_parameter_validation=( 210 prefer_skip_nested_validation or global_skip_validation 211 ) 212 ): --> 213 return func(*args, **kwargs) 214 except InvalidParameterError as e: 215 # When the function is just a wrapper around an estimator, we allow 216 # the function to delegate validation to the estimator, but we replace 217 # the name of the estimator by the name of the function in the error 218 # message to avoid confusion. 219 msg = re.sub( 220 r"parameter of \w+ must be", 221 f"parameter of {func.__qualname__} must be", 222 str(e), 223 ) File ~\anaconda3\Lib\site-packages\sklearn\metrics\_regression.py:497, in mean_squared_error(y_true, y_pred, sample_weight, multioutput, squared) 492 if not squared: 493 return root_mean_squared_error( 494 y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput 495 ) --> 497 y_type, y_true, y_pred, multioutput = _check_reg_targets( 498 y_true, y_pred, multioutput 499 ) 500 check_consistent_length(y_true, y_pred, sample_weight) 501 output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) File ~\anaconda3\Lib\site-packages\sklearn\metrics\_regression.py:102, in _check_reg_targets(y_true, y_pred, multioutput, dtype) 68 def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"): 69 """Check that y_true and y_pred belong to the same regression task. 70 71 Parameters (...) 100 correct keyword. 101 """ --> 102 check_consistent_length(y_true, y_pred) 103 y_true = check_array(y_true, ensure_2d=False, dtype=dtype) 104 y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype) File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:457, in check_consistent_length(*arrays) 455 uniques = np.unique(lengths) 456 if len(uniques) > 1: --> 457 raise ValueError( 458 "Found input variables with inconsistent numbers of samples: %r" 459 % [int(l) for l in lengths] 460 ) ValueError: Found input variables with inconsistent numbers of samples: [0, 1]
In [ ]:
# Plot results
plt.figure(figsize=(14, 6))
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (EMD-SARIMA)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (EMD-SARIMA)\nSelected IMFs: 2–6 | RMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
In [ ]:
for imf_index, imf in enumerate(selected_imfs, start=2):
print(f"🔍 Tuning SARIMA for IMF {imf_index}")
best_score = float('inf')
best_params = None
best_forecast = None
tscv = TimeSeriesSplit(n_splits=3)
for params in ParameterGrid(param_grid):
fold_losses = []
for train_idx, val_idx in tscv.split(imf):
train_series = imf[train_idx]
val_series = imf[val_idx]
try:
model = SARIMAX(
train_series,
order=(params['p'], params['d'], params['q']),
seasonal_order=(params['P'], params['D'], params['Q'], m),
enforce_stationarity=False,
enforce_invertibility=False
)
model_fit = model.fit(disp=False)
val_forecast = model_fit.forecast(steps=len(val_series))
score = mean_squared_error(val_series, val_forecast)
fold_losses.append(score)
except:
continue # skip bad models
if fold_losses:
avg_loss = np.mean(fold_losses)
if avg_loss < best_score:
best_score = avg_loss
best_params = params
try:
full_model = SARIMAX(
imf,
order=(params['p'], params['d'], params['q']),
seasonal_order=(params['P'], params['D'], params['Q'], m),
enforce_stationarity=False,
enforce_invertibility=False
)
full_model_fit = full_model.fit(disp=False)
forecast = full_model_fit.forecast(steps=test_size) # Define test_size before
best_forecast = forecast
except:
continue
print(f"✅ Best params for IMF {imf_index}: {best_params} with MSE={best_score:.4f}")
best_params_summary.append({
"IMF": imf_index,
"p": best_params['p'],
"d": best_params['d'],
"q": best_params['q'],
"P": best_params['P'],
"D": best_params['D'],
"Q": best_params['Q'],
"m": m,
"Validation MSE": best_score
})
imf_predictions.append(best_forecast)
🔍 Tuning SARIMA for IMF 2
=== Fit SARIMA on Selected IMFs ===¶
In [ ]:
# Initialize summary list
full_model_summaries = []
# Loop over selected IMFs and corresponding best parameters
for i, (imf, summary) in enumerate(zip(selected_imfs, best_params_summary)):
imf_index = summary["IMF"]
params = summary
# Separate train and test
train_data = imf[:train_size + val_size]
test_data = imf[train_size + val_size:]
# Refit best SARIMA
model = SARIMAX(
train_data,
order=(params['p'], params['d'], params['q']),
seasonal_order=(params['P'], params['D'], params['Q'], m),
enforce_stationarity=False,
enforce_invertibility=False
)
model_fit = model.fit(disp=False)
=== Reconstruct Final Forecast ===¶
In [ ]:
reconstructed_forecast = np.sum(imf_predictions, axis=0)
=== Step 4: Get actual test values ===¶
In [ ]:
actual = df['Price'].values[train_size + val_size:]
forecast_index = df.index[train_size + val_size:]
=== Evaluation ===¶
In [ ]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
# Ensure arrays are NumPy and 1D
actual = np.asarray(actual).flatten()
reconstructed_forecast = np.asarray(reconstructed_forecast).flatten()
# Core Metrics
final_rmse = np.sqrt(mean_squared_error(actual, reconstructed_forecast))
final_mape = np.mean(np.abs((actual - reconstructed_forecast) / actual)) * 100
final_mae = mean_absolute_error(actual, reconstructed_forecast)
final_r2 = r2_score(actual, reconstructed_forecast)
# Directional Accuracy
if len(actual) > 1 and len(reconstructed_forecast) > 1:
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(reconstructed_forecast))
if len(actual_diff) == len(forecast_diff):
final_da = np.mean(actual_diff == forecast_diff) * 100
else:
final_da = np.nan
else:
final_da = np.nan
# Print Results
print(f"\n🎯 Final Reconstructed Forecast Evaluation:")
print(f"RMSE: {final_rmse:.2f}")
print(f"MAPE: {final_mape:.2f}%")
print(f"MAE: {final_mae:.2f}")
print(f"R²: {final_r2:.4f}")
print(f"Directional Accuracy: {final_da:.2f}%" if not np.isnan(final_da) else "Directional Accuracy: N/A (insufficient data)")
=== Plot Forecast vs Actual ===¶
In [ ]:
plt.figure(figsize=(14, 6))
plt.plot(df.index[:train_size], df['Price'].values[:train_size], label="Train", color='green')
plt.plot(df.index[train_size:train_size + val_size], df['Price'].values[train_size:train_size + val_size], label="Validation", color='orange')
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (SARIMA + EMD)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (SARIMA + EMD)\nRMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./Quintal)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eSAR_result5.png", dpi=300, bbox_inches='tight')
plt.show()
In [ ]:
plt.figure(figsize=(12, 6))
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (SARIMA + EMD)", linestyle='--', color='blue')
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/eSAR_result6.png", dpi=300, bbox_inches='tight')
plt.show()
In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from PyEMD import EMD
from itertools import product
from joblib import Parallel, delayed
from tqdm import tqdm
# Suppress warnings
warnings.filterwarnings("ignore")
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
# EMD Decomposition with reduced components
print("Performing EMD decomposition...")
emd = EMD()
emd.extrema_detection = "parabol"
imfs = emd.emd(data, max_imf=5) # Limit to 5 IMFs
# Filter meaningful IMFs (remove low-variance components)
imfs = [imf for imf in imfs if np.var(imf) > 0.05*np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")
# Plot IMFs
plt.figure(figsize=(16, 4*(len(imfs)+1)))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r')
plt.title("Original Time Series")
plt.grid()
for i, imf in enumerate(imfs):
plt.subplot(len(imfs)+1, 1, i+2)
plt.plot(df.index, imf, 'g')
plt.title(f"IMF {i+1} (Variance: {np.var(imf):.2f})")
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esAR_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# Data splitting with proper variable definitions
total_size = len(data)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:] # This was missing in original code
# Focused SARIMA parameter grid
param_grid = {
'order': [(2,1,0),(5,1,0),(3,0,2),(2,0,3)],
'seasonal_order': [(0,1,1,26), (1,1,1,26), (0,1,0,26),(2,0,1,26),(2,0,2,26),(2,0,0,26)]
}
best_imf_models = {}
def train_sarima(imf, order, seasonal_order, train_size, val_size):
try:
model = SARIMAX(imf[:train_size],
order=order,
seasonal_order=seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False)
model_fit = model.fit(disp=False)
val_pred = model_fit.forecast(steps=val_size)
return mean_squared_error(imf[train_size:train_size+val_size], val_pred), model_fit
except:
return np.inf, None
# Parallel training for each IMF
for i, imf in enumerate(imfs, start=1):
print(f"\nTraining SARIMA for IMF {i} (Variance: {np.var(imf):.2f})")
results = Parallel(n_jobs=-1)(
delayed(train_sarima)(imf, order, seasonal_order, train_size, val_size)
for order, seasonal_order in tqdm(product(param_grid['order'], param_grid['seasonal_order']),
total=len(param_grid['order'])*len(param_grid['seasonal_order']))
)
scores, models = zip(*results)
best_idx = np.argmin(scores)
best_imf_models[f'IMF_{i}'] = models[best_idx]
if models[best_idx]:
print(f"Best params: {models[best_idx].model.order}x{models[best_idx].model.seasonal_order} | MSE: {scores[best_idx]:.4f}")
# Reconstruct and evaluate
test_predictions = np.zeros(test_size)
for imf_name, model in best_imf_models.items():
if model: # Only use valid models
test_predictions += model.forecast(steps=test_size)
# Evaluation metrics
metrics = {
'MSE': mean_squared_error(test_data, test_predictions),
'RMSE': np.sqrt(mean_squared_error(test_data, test_predictions)),
'MAE': mean_absolute_error(test_data, test_predictions),
'MAPE': mean_absolute_percentage_error(test_data, test_predictions),
'R²': r2_score(test_data, test_predictions)
}
print("\nFinal Evaluation Metrics:")
for k, v in metrics.items():
print(f"{k}: {v:.4f}")
# Extract the actual test values and the predictions
y_actual = test_data
y_pred = test_predictions
# Ensure they are the same length (a good practice)
min_length = min(len(y_actual), len(y_pred))
y_actual = y_actual[:min_length]
y_pred = y_pred[:min_length]
# Standard Regression Metrics
mse = mean_squared_error(y_actual, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_actual, y_pred)
mape = mean_absolute_percentage_error(y_actual, y_pred)
r2 = r2_score(y_actual, y_pred)
# --- Calculate Directional Accuracy (DA) ---
# Calculate the actual change and the predicted change between consecutive points
actual_changes = np.diff(y_actual) # e.g., [y1-y0, y2-y1, ...]
predicted_changes = np.diff(y_pred) # e.g., [y_hat1-y_hat0, y_hat2-y_hat1, ...]
# Check if the direction (sign) of the change is the same
correct_direction = np.sign(actual_changes) == np.sign(predicted_changes)
# Calculate the percentage of correct directions
da = (np.sum(correct_direction) / len(actual_changes)) * 100
# Plot results
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("Optimized EMD-SARIMA: Actual vs Predicted")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esAR_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Print all metrics, including DA
print("\nFinal Evaluation Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}%")
print(f"R²: {r2:.4f}")
print(f"Directional Accuracy (DA): {da:.2f}%")
# Plot results
plt.figure(figsize=(12, 6))
plt.plot(df.index[train_size+val_size:], test_data, label='Actual')
plt.plot(df.index[train_size+val_size:], test_predictions, label='Predicted', alpha=0.7)
plt.title("Optimized EMD-SARIMA: Actual vs Predicted")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esAR_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Model diagnostics
for imf_name, model in best_imf_models.items():
if model:
print(f"\n{imf_name} Model Summary:")
print(model.summary())
model.plot_diagnostics(figsize=(12, 8))
plt.suptitle(f"{imf_name} Diagnostics", y=1.02)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/esAR_result4.png", dpi=300, bbox_inches='tight')
plt.show()
Performing EMD decomposition... Selected 6 meaningful IMFs
Training SARIMA for IMF 1 (Variance: 55233660341437862295686600458240.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:09<00:00, 2.42it/s]
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 363553755381698150724496523264.0000 Training SARIMA for IMF 2 (Variance: 49912052842838056400575845629952.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:01<00:00, 14.21it/s]
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 59775053773018410379879907328.0000 Training SARIMA for IMF 3 (Variance: 449418934480625974407117930496.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:01<00:00, 12.01it/s]
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 69933874802371244797865230336.0000 Training SARIMA for IMF 4 (Variance: 434742580397082004974810955776.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:02<00:00, 8.81it/s]
Best params: (5, 1, 0)x(2, 0, 0, 26) | MSE: 14234279385445683335121600512.0000 Training SARIMA for IMF 5 (Variance: 74837305360678816574282399744.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:02<00:00, 10.68it/s]
Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 39097127682665127677526016.0000 Training SARIMA for IMF 6 (Variance: 1443827991893432784687988736.00)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:04<00:00, 5.30it/s]
Best params: (2, 1, 0)x(0, 1, 0, 26) | MSE: 168209.9778 Final Evaluation Metrics: MSE: 110808286918274838905162301440.0000 RMSE: 332878787125696.6250 MAE: 283766135451053.4375 MAPE: 191852526819.0257 R²: -557451738620487155056640.0000
Final Evaluation Metrics: MSE: 110808286918274838905162301440.0000 RMSE: 332878787125696.6250 MAE: 283766135451053.4375 MAPE: 191852526819.0257% R²: -557451738620487155056640.0000 Directional Accuracy (DA): 21.30%
IMF_1 Model Summary:
SARIMAX Results
===========================================================================================
Dep. Variable: y No. Observations: 505
Model: SARIMAX(3, 0, 2)x(2, 0, [], 26) Log Likelihood -15480.155
Date: Thu, 06 Nov 2025 AIC 30976.310
Time: 11:47:57 BIC 31009.184
Sample: 0 HQIC 30989.267
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 2.6141 0.036 73.199 0.000 2.544 2.684
ar.L2 -2.3666 0.070 -33.895 0.000 -2.503 -2.230
ar.L3 0.7460 0.036 20.552 0.000 0.675 0.817
ma.L1 0.8043 0.048 16.922 0.000 0.711 0.897
ma.L2 -0.0875 0.051 -1.717 0.086 -0.187 0.012
ar.S.L26 -0.0085 0.081 -0.104 0.917 -0.168 0.151
ar.S.L52 -0.0035 0.191 -0.018 0.985 -0.377 0.370
sigma2 4.565e+28 nan nan nan nan nan
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 401456.37
Prob(Q): 1.00 Prob(JB): 0.00
Heteroskedasticity (H): 586.01 Skew: -7.62
Prob(H) (two-sided): 0.00 Kurtosis: 148.53
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 3.31e+71. Standard errors may be unstable.
IMF_2 Model Summary:
SARIMAX Results
===========================================================================================
Dep. Variable: y No. Observations: 505
Model: SARIMAX(3, 0, 2)x(2, 0, [], 26) Log Likelihood -15478.854
Date: Thu, 06 Nov 2025 AIC 30973.708
Time: 11:48:00 BIC 31006.582
Sample: 0 HQIC 30986.665
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 2.6083 0.036 72.740 0.000 2.538 2.679
ar.L2 -2.3574 0.070 -33.581 0.000 -2.495 -2.220
ar.L3 0.7418 0.037 20.260 0.000 0.670 0.814
ma.L1 0.8041 0.047 17.091 0.000 0.712 0.896
ma.L2 -0.0877 0.050 -1.738 0.082 -0.187 0.011
ar.S.L26 -0.0084 0.083 -0.101 0.919 -0.171 0.154
ar.S.L52 -0.0042 0.180 -0.023 0.982 -0.357 0.349
sigma2 4.537e+28 nan nan nan nan nan
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 397867.33
Prob(Q): 0.99 Prob(JB): 0.00
Heteroskedasticity (H): 584.81 Skew: 7.57
Prob(H) (two-sided): 0.00 Kurtosis: 147.88
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.36e+72. Standard errors may be unstable.
IMF_3 Model Summary:
SARIMAX Results
===========================================================================================
Dep. Variable: y No. Observations: 505
Model: SARIMAX(3, 0, 2)x(2, 0, [], 26) Log Likelihood -12658.025
Date: Thu, 06 Nov 2025 AIC 25332.050
Time: 11:48:03 BIC 25364.924
Sample: 0 HQIC 25345.007
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 2.9649 0.025 116.462 0.000 2.915 3.015
ar.L2 -2.9416 0.050 -58.336 0.000 -3.040 -2.843
ar.L3 0.9766 0.025 38.567 0.000 0.927 1.026
ma.L1 1.2987 0.056 23.333 0.000 1.190 1.408
ma.L2 0.7186 0.059 12.282 0.000 0.604 0.833
ar.S.L26 0.0307 0.116 0.264 0.792 -0.197 0.258
ar.S.L52 -0.0004 0.007 -0.053 0.958 -0.014 0.013
sigma2 3.184e+23 2.13e-25 1.5e+48 0.000 3.18e+23 3.18e+23
===================================================================================
Ljung-Box (L1) (Q): 51.02 Jarque-Bera (JB): 1287.00
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 11.25 Skew: -0.25
Prob(H) (two-sided): 0.00 Kurtosis: 11.27
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 1.13e+63. Standard errors may be unstable.
IMF_4 Model Summary:
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 505
Model: SARIMAX(5, 1, 0)x(2, 0, 0, 26) Log Likelihood -10564.602
Date: Thu, 06 Nov 2025 AIC 21145.204
Time: 11:48:06 BIC 21178.024
Sample: 0 HQIC 21158.143
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 3.5023 0.019 180.791 0.000 3.464 3.540
ar.L2 -4.8957 0.066 -73.718 0.000 -5.026 -4.766
ar.L3 3.6338 0.096 38.037 0.000 3.447 3.821
ar.L4 -1.5914 0.071 -22.401 0.000 -1.731 -1.452
ar.L5 0.3509 0.022 16.053 0.000 0.308 0.394
ar.S.L26 0.0490 0.067 0.730 0.465 -0.083 0.181
ar.S.L52 0.0300 0.049 0.610 0.542 -0.066 0.126
sigma2 1.643e+19 3.19e-21 5.16e+39 0.000 1.64e+19 1.64e+19
===================================================================================
Ljung-Box (L1) (Q): 24.06 Jarque-Bera (JB): 7184.28
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.50 Skew: -0.43
Prob(H) (two-sided): 0.00 Kurtosis: 22.62
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 9.57e+54. Standard errors may be unstable.
IMF_5 Model Summary:
SARIMAX Results
===========================================================================================
Dep. Variable: y No. Observations: 505
Model: SARIMAX(3, 0, 2)x(2, 0, [], 26) Log Likelihood -10887.006
Date: Thu, 06 Nov 2025 AIC 21790.011
Time: 11:48:09 BIC 21822.885
Sample: 0 HQIC 21802.968
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 2.9424 0.001 3126.970 0.000 2.941 2.944
ar.L2 -2.8849 0.002 -1535.284 0.000 -2.889 -2.881
ar.L3 0.9425 0.001 1004.395 0.000 0.941 0.944
ma.L1 -7.204e-05 1.2e-06 -60.172 0.000 -7.44e-05 -6.97e-05
ma.L2 -2.191e-05 3.84e-07 -57.043 0.000 -2.27e-05 -2.12e-05
ar.S.L26 1.7915 0.010 175.240 0.000 1.771 1.812
ar.S.L52 -0.6243 0.010 -61.792 0.000 -0.644 -0.605
sigma2 1.056e+17 2.4e-19 4.4e+35 0.000 1.06e+17 1.06e+17
===================================================================================
Ljung-Box (L1) (Q): 391.05 Jarque-Bera (JB): 52.78
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.75 Skew: -0.05
Prob(H) (two-sided): 0.09 Kurtosis: 4.68
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 1.64e+49. Standard errors may be unstable.
IMF_6 Model Summary:
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 505
Model: SARIMAX(2, 1, 0)x(0, 1, 0, 26) Log Likelihood 166.098
Date: Thu, 06 Nov 2025 AIC -326.197
Time: 11:48:12 BIC -313.701
Sample: 0 HQIC -321.283
- 505
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 2.0000 8.25e-13 2.42e+12 0.000 2.000 2.000
ar.L2 -1.0000 9.72e-13 -1.03e+12 0.000 -1.000 -1.000
sigma2 0.0288 3.53e-23 8.17e+20 0.000 0.029 0.029
===================================================================================
Ljung-Box (L1) (Q): 230.74 Jarque-Bera (JB): 0.35
Prob(Q): 0.00 Prob(JB): 0.84
Heteroskedasticity (H): 0.52 Skew: -0.03
Prob(H) (two-sided): 0.00 Kurtosis: 3.12
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 8.02e+36. Standard errors may be unstable.
=== EMD + LSTM===¶
In [98]:
pip install EMD-signal
Requirement already satisfied: EMD-signal in c:\users\marti\anaconda3\lib\site-packages (1.6.4) Requirement already satisfied: numpy>=1.12 in c:\users\marti\anaconda3\lib\site-packages (from EMD-signal) (1.26.4) Requirement already satisfied: scipy>=0.19 in c:\users\marti\anaconda3\lib\site-packages (from EMD-signal) (1.13.1) Requirement already satisfied: pathos>=0.2.1 in c:\users\marti\anaconda3\lib\site-packages (from EMD-signal) (0.3.3) Requirement already satisfied: tqdm<5.0,>=4.64.0 in c:\users\marti\anaconda3\lib\site-packages (from EMD-signal) (4.66.4) Requirement already satisfied: ppft>=1.7.6.9 in c:\users\marti\anaconda3\lib\site-packages (from pathos>=0.2.1->EMD-signal) (1.7.6.9) Requirement already satisfied: dill>=0.3.9 in c:\users\marti\anaconda3\lib\site-packages (from pathos>=0.2.1->EMD-signal) (0.3.9) Requirement already satisfied: pox>=0.3.5 in c:\users\marti\anaconda3\lib\site-packages (from pathos>=0.2.1->EMD-signal) (0.3.5) Requirement already satisfied: multiprocess>=0.70.17 in c:\users\marti\anaconda3\lib\site-packages (from pathos>=0.2.1->EMD-signal) (0.70.17) Requirement already satisfied: colorama in c:\users\marti\appdata\roaming\python\python312\site-packages (from tqdm<5.0,>=4.64.0->EMD-signal) (0.4.6) Note: you may need to restart the kernel to use updated packages.
=== Loading Libraries ===¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PyEMD import EMD
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
In [2]:
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\nedumkandam.xlsx",parse_dates=True)
In [3]:
df.head()
Out[3]:
| State Name | District Name | Market Name | Variety | Group | Arrivals (Tonnes) | Min Price (Rs./Quintal) | Max Price (Rs./Quintal) | Modal Price (Rs./Quintal) | Date | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Kerala | Idukki | Nedumkandam | Other | Spices | 14.0 | 120000 | 150000 | 130000 | 2011-01-16 |
| 1 | Kerala | Idukki | Nedumkandam | Other | Spices | 17.0 | 120000 | 150000 | 140000 | 2011-01-23 |
| 2 | Kerala | Idukki | Nedumkandam | Other | Spices | 12.0 | 120000 | 150000 | 130000 | 2011-01-30 |
| 3 | Kerala | Idukki | Nedumkandam | Other | Spices | 8.5 | 120000 | 150000 | 125000 | 2011-02-06 |
| 4 | Kerala | Idukki | Nedumkandam | Other | Spices | 9.2 | 100000 | 115000 | 107500 | 2011-02-13 |
In [4]:
df[" Date"] = pd.to_datetime(df[" Date"])
df.set_index(" Date", inplace=True)
In [5]:
df["Modal Price (Rs./kg)"]=df["Modal Price (Rs./Quintal)"]/100
In [6]:
price_values = df["Modal Price (Rs./kg)"].values
=== Step 1.5: Normalize Original Price Series ===¶
In [8]:
# Extract price values and normalize them
price_values = df["Modal Price (Rs./kg)"].values
scaler_total = MinMaxScaler()
scaled_data = scaler_total.fit_transform(price_values.reshape(-1, 1)).flatten()
=== Step 2: Apply EMD on Normalized Data ===¶
In [10]:
emd = EMD()
imfs = emd(scaled_data)
=== Step 3: Visualize IMFs ===¶
In [12]:
plt.figure(figsize=(16, 20))
plt.subplot(len(imfs) + 1, 1, 1)
plt.plot(df.index, scaled_data, 'r')
plt.title("Normalized Time Series")
plt.grid()
for i, imf in enumerate(imfs):
plt.subplot(len(imfs) + 1, 1, i + 2)
plt.plot(df.index, imf, 'g')
plt.title(f"IMF {i + 1}")
plt.grid()
plt.tight_layout()
plt.show()
Step 3: Select Relevant IMFs (e.g., first 3 IMFs)¶
In [14]:
selected_imfs = imfs
=== Step 4: Train/Val/Test Split ===¶
In [16]:
total_size = len(df)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
test_idx = slice(train_size + val_size, total_size)
=== Step 5: LSTM Helper Function ===¶
In [18]:
# Function to create sequences
def create_sequences(data, look_back):
X, y = [], []
for i in range(len(data) - look_back):
X.append(data[i:i + look_back])
y.append(data[i + look_back])
return np.array(X), np.array(y)
=== Step 6: Train LSTM on each IMF with TSCV and Hyperparameter Tuning ===¶
In [20]:
from sklearn.model_selection import TimeSeriesSplit, ParameterGrid
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
1. Hyperparameter grid¶
In [22]:
# Sample parameter grids
look_back_options = [3, 5]
lstm_units_options = [32, 50]
epochs_options = [30, 50]
batch_size = 16
2. Initialize result storage¶
In [26]:
# Placeholder for results
best_params_summary = []
imf_predictions = []
3. Select IMF 3 to 6 (Python index 2 to 5)¶
In [34]:
selected_imfs = imfs
Start tuning¶
In [ ]:
for imf_index, imf in enumerate(selected_imfs, start=1):
print(f"🔍 Tuning LSTM for IMF {imf_index}")
best_score = float('inf')
best_params = None
best_forecast = None
param_grid = {
'look_back': look_back_options,
'lstm_units': lstm_units_options,
'epochs': epochs_options
}
tscv = TimeSeriesSplit(n_splits=3)
for params in ParameterGrid(param_grid):
fold_losses = []
for train_idx, val_idx in tscv.split(imf):
scaler = MinMaxScaler()
scaled_imf = scaler.fit_transform(imf.reshape(-1, 1)).flatten()
train_series = scaled_imf[train_idx]
val_series = scaled_imf[val_idx[0]-params['look_back']:val_idx[-1]+1]
X_train, y_train = create_sequences(train_series, params['look_back'])
X_val, y_val = create_sequences(val_series, params['look_back'])
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
model = Sequential()
model.add(LSTM(params['lstm_units'], activation='relu', input_shape=(params['look_back'], 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model.fit(X_train, y_train, epochs=params['epochs'], batch_size=batch_size, verbose=0, validation_data=(X_val, y_val), callbacks=[es])
val_pred_scaled = model.predict(X_val)
val_pred = scaler.inverse_transform(val_pred_scaled).flatten()
actual_val = scaler.inverse_transform(y_val.reshape(-1, 1)).flatten()
score = mean_squared_error(actual_val, val_pred)
fold_losses.append(score)
avg_loss = np.mean(fold_losses)
if avg_loss < best_score:
best_score = avg_loss
best_params = params
scaler = MinMaxScaler()
scaled_imf = scaler.fit_transform(imf.reshape(-1, 1)).flatten()
train_series = scaled_imf[:train_size + val_size]
X_train, y_train = create_sequences(train_series, params['look_back'])
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
test_series = scaled_imf[train_size + val_size - params['look_back']:]
X_test, _ = create_sequences(test_series, params['look_back'])
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
model = Sequential()
model.add(LSTM(params['lstm_units'], activation='relu', input_shape=(params['look_back'], 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=params['epochs'], batch_size=batch_size, verbose=0)
forecast_scaled = model.predict(X_test)
best_forecast = scaler.inverse_transform(forecast_scaled).flatten()
print(f"✅ Best params for IMF {imf_index}: {best_params} with MSE={best_score:.4f}")
best_params_summary.append({
"IMF": imf_index,
"Look_back": best_params['look_back'],
"Units": best_params['lstm_units'],
"Epochs": best_params['epochs'],
"Validation MSE": best_score
})
imf_predictions.append(best_forecast)
🔍 Tuning LSTM for IMF 1
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 38ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 33ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
WARNING:tensorflow:5 out of the last 11 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x0000021621CAB2E0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 1/5 ━━━━━━━━━━━━━━━━━━━━ 0s 144ms/stepWARNING:tensorflow:5 out of the last 11 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x0000021621CAB2E0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 75ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 38ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 70ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 38ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 39ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 66ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 41ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 39ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 41ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 71ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 34ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 73ms/step ✅ Best params for IMF 1: {'epochs': 50, 'look_back': 5, 'lstm_units': 50} with MSE=0.0029 🔍 Tuning LSTM for IMF 2
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 32ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 71ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 34ms/step
C:\Users\marti\anaconda3\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
In [ ]:
# Print final model summary
print("\n🏆 Final Model Summary 🏆")
print("="*50)
print(f"{'IMF':<8}{'Look Back':<12}{'LSTM Units':<12}{'Epochs':<10}{'Validation MSE':<15}")
print("-"*50)
for summary in best_params_summary:
print(f"{summary['IMF']:<8}{summary['Look_back']:<12}{summary['Units']:<12}{summary['Epochs']:<10}{summary['Validation MSE']:.4f}")
print("="*50)
# Calculate and print overall performance
total_mse = sum(item['Validation MSE'] for item in best_params_summary)
avg_mse = total_mse / len(best_params_summary)
print(f"\n📊 Overall Performance:")
print(f" - Total Validation MSE across all IMFs: {total_mse:.4f}")
print(f" - Average Validation MSE per IMF: {avg_mse:.4f}")
# Print final recommendations
print("\n💡 Recommendations:")
print(" - Best performing IMF components (lowest MSE):")
sorted_imfs = sorted(best_params_summary, key=lambda x: x['Validation MSE'])
for i, imf in enumerate(sorted_imfs[:3], 1):
print(f" {i}. IMF {imf['IMF']} (MSE: {imf['Validation MSE']:.4f})")
print(" - Consider focusing on these components for further optimization")
print(" - Higher MSE components may need different architecture or preprocessing")
=== Step 7: Reconstruct Final Forecast from IMFs ===¶
In [ ]:
reconstructed_scaled_forecast = np.sum(imf_predictions, axis=0)
reconstructed_forecast = scaler_total.inverse_transform(reconstructed_scaled_forecast.reshape(-1, 1)).flatten()
actual = df['Modal Price (Rs./kg)'].values[test_idx][:len(reconstructed_forecast)]
forecast_index = df.index[test_idx][:len(reconstructed_forecast)]
In [ ]:
reconstructed_forecast
In [ ]:
actual
=== Step 8: Evaluation ===¶
In [ ]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pandas as pd
# Define datetime index
da = forecast_index
# Ensure arrays
actual = np.array(actual)
forecast = np.array(reconstructed_forecast)
# Evaluation metrics
epsilon = 1e-10
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
mape = np.mean(np.abs((actual - forecast) / (actual + epsilon))) * 100
smape = 100 * np.mean(2 * np.abs(actual - forecast) / (np.abs(actual) + np.abs(forecast) + epsilon))
r2 = r2_score(actual, forecast)
# Directional Accuracy
actual_diff = np.diff(actual)
forecast_diff = np.diff(forecast)
direction_matches = np.sign(actual_diff) == np.sign(forecast_diff)
directional_accuracy = np.mean(direction_matches) * 100
# Create results DataFrame with datetime index
df_results = pd.DataFrame({
'Date': da,
'Actual': actual,
'Forecast': forecast
})
df_results.set_index('Date', inplace=True)
# Display sample
print(df_results.head())
# Print metrics
print("\n" + "="*50)
print("MODEL PERFORMANCE METRICS".center(50))
print("="*50)
print(f"{'Samples Evaluated':<35}: {len(actual)}")
print(f"{'Mean Squared Error (MSE)':<35}: {mse:.2f}")
print(f"{'Root Mean Squared Error (RMSE)':<35}: {rmse:.2f}")
print(f"{'Mean Absolute Error (MAE)':<35}: {mae:.2f}")
print(f"{'Mean Absolute Percentage Error (MAPE)':<35}: {mape:.2f}%")
print(f"{'Symmetric MAPE (sMAPE)':<35}: {smape:.2f}%")
print(f"{'R-squared (R²)':<35}: {r2:.4f}")
print(f"{'Directional Accuracy (DA)':<35}: {directional_accuracy:.2f}%")
print("="*50)
=== Step 9: Plot Forecast vs Actual ===¶
In [ ]:
plt.figure(figsize=(14, 6))
plt.plot(df.index[:train_size], df['Price'].values[:train_size], label="Train", color='green')
plt.plot(df.index[train_size:train_size + val_size], df['Price'].values[train_size:train_size + val_size], label="Validation", color='orange')
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (LSTM + EMD)", linestyle='--', color='blue')
plt.title(f"Cardamom Price Forecast (SARIMA + EMD)\nRMSE: {final_rmse:.2f} | MAPE: {final_mape:.1f}%", pad=20)
plt.xlabel("Date")
plt.ylabel("Price (Rs./Quintal)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
In [38]:
plt.figure(figsize=(14, 6))
plt.plot(forecast_index, actual, label="Test (Actual)", color='red')
plt.plot(forecast_index, reconstructed_forecast, label="Forecast (LSTM + EMD)", linestyle='--', color='blue')
Out[38]:
[<matplotlib.lines.Line2D at 0x240c31cfc50>]
In [26]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PyEMD import EMD
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout # Changed GRU to LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf
# Suppress warnings
warnings.filterwarnings("ignore")
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
print(f"Original data length: {len(data)}")
# --- Step 1: EMD Decomposition ---
print("Performing EMD decomposition...")
emd = EMD()
imfs = emd.emd(data, max_imf=5) # Extract up to 5 IMFs
# Plot IMFs
plt.figure(figsize=(16, 4*(len(imfs)+1)))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r', linewidth=2)
plt.title("Original Time Series")
plt.grid()
for i, imf in enumerate(imfs):
plt.subplot(len(imfs)+1, 1, i+2)
plt.plot(df.index, imf, 'g')
plt.title(f"IMF {i+1} (Variance: {np.var(imf):.2f})")
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Filter meaningful IMFs (remove low-variance components)
imfs = [imf for imf in imfs if np.var(imf) > 0.05*np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")
# Reconstruct signal from selected IMFs
reconstructed_data = np.sum(imfs, axis=0)
# Plot original vs reconstructed data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, reconstructed_data, label='EMD Reconstructed', linewidth=2)
plt.title("Original vs EMD-Reconstructed Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 2: Data Preparation for LSTM ---
# Use reconstructed data for training
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(reconstructed_data.reshape(-1, 1))
# Create sequences for LSTM
def create_sequences(data, lookback=52):
X, y = [], []
for i in range(lookback, len(data)):
X.append(data[i-lookback:i])
y.append(data[i])
return np.array(X), np.array(y)
lookback = 52 # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)
# Reshape for LSTM [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
# --- Step 3: Hyperparameter Tuning for LSTM ---
def build_model(hp):
model = Sequential()
# Number of LSTM layers (changed from GRU to LSTM)
for i in range(hp.Int('num_layers', 1, 3)):
model.add(LSTM( # Changed GRU to LSTM
units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
))
model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
# Dense layers with ReLU activation
for i in range(hp.Int('dense_layers', 0, 2)):
model.add(Dense(
units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
activation='relu'
))
model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
model.add(Dense(1, activation='linear'))
model.compile(
optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
loss='mse',
metrics=['mae']
)
return model
print("\nStarting LSTM hyperparameter tuning...")
tuner = RandomSearch(
build_model,
objective='val_loss',
max_trials=15,
executions_per_trial=2,
directory='emd_lstm_tuning', # Changed directory name
project_name='cardamom_emd_lstm' # Changed project name
)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
tuner.search(
X_train, y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of LSTM layers: {best_hp.get('num_layers')}") # Changed from GRU to LSTM
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
print(f"LSTM layer {i+1} units: {best_hp.get(f'units_{i}')}") # Changed from GRU to LSTM
print(f"LSTM layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}") # Changed from GRU to LSTM
# --- Step 4: Build and Train Final LSTM Model ---
final_model = tuner.hypermodel.build(best_hp)
print("\nTraining final EMD-LSTM model...") # Changed to LSTM
history = final_model.fit(
X_train, y_train,
epochs=200,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# --- Step 5: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()
# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
# Get actual values (original scale)
y_actual_original = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
y_actual_reconstructed = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
# --- Step 6: Evaluation ---
def evaluate_forecast(actual, forecast):
"""Comprehensive forecast evaluation"""
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
mape = mean_absolute_percentage_error(actual, forecast)
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
return {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'MAPE': mape,
'R²': r2,
'Directional Accuracy': da
}
# Evaluate on reconstructed data
metrics_reconstructed = evaluate_forecast(y_actual_reconstructed, y_pred)
# Evaluate on original data
metrics_original = evaluate_forecast(y_actual_original, y_pred)
print("\n" + "="*60)
print("EMD-LSTM MODEL TRAINING SUMMARY") # Changed to LSTM
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print(f"Number of IMFs used: {len(imfs)}")
print("\nLSTM Model Architecture:") # Changed to LSTM
final_model.summary()
print("\n" + "="*60)
print("EVALUATION ON RECONSTRUCTED DATA")
print("="*60)
for metric, value in metrics_reconstructed.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
print("\n" + "="*60)
print("EVALUATION ON ORIGINAL DATA")
print("="*60)
for metric, value in metrics_original.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
# --- Step 7: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
plt.figure(figsize=(18, 12))
# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('EMD-LSTM Training History') # Changed to LSTM
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original vs Reconstructed vs Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', alpha=0.7, color='blue')
plt.plot(df.index, reconstructed_data, label='EMD Reconstructed', color='green', linewidth=2)
plt.plot(test_dates, y_pred, label='LSTM Forecast', color='red', linestyle='--', linewidth=2) # Changed to LSTM
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original vs EMD-Reconstructed vs LSTM Forecast') # Changed to LSTM
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (LSTM)', color='red', linestyle='--', linewidth=2) # Changed to LSTM
plt.fill_between(test_dates,
y_pred - metrics_original['RMSE'],
y_pred + metrics_original['RMSE'],
alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - EMD-LSTM Model (Test Period)') # Changed to LSTM
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result5.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 8: Individual IMF Analysis ---
plt.figure(figsize=(16, 4*len(imfs)))
for i, imf in enumerate(imfs):
plt.subplot(len(imfs), 1, i+1)
plt.plot(df.index, imf, color=['blue', 'green', 'red', 'purple', 'orange'][i % 5])
plt.title(f'IMF {i+1} (Variance: {np.var(imf):.4f})')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result6.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 9: Residual Analysis ---
residuals = y_actual_original - y_pred
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('EMD-LSTM Residuals Over Time') # Changed to LSTM
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result7.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('EMD-LSTM Residual Distribution') # Changed to LSTM
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result8.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result9.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual_original, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result11.png", dpi=300, bbox_inches='tight')
plt.show()
print("\nEMD-LSTM Residual Analysis:") # Changed to LSTM
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")
# --- Step 10: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
"""Forecast future values"""
forecasts = []
current_sequence = last_sequence.copy()
for _ in range(steps):
prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
forecasts.append(prediction)
# Update sequence
new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
current_sequence = new_sequence
# Inverse transform
forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
return future_dates, forecasts
# Forecast next 12 weeks
try:
last_sequence = scaled_data[-lookback:]
future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)
print("\n" + "="*50)
print("FUTURE FORECAST - EMD-LSTM MODEL (NEXT 12 WEEKS)") # Changed to LSTM
print("="*50)
for date, price in zip(future_dates, future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
# Plot future forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
plt.plot(future_dates, future_prices, label='EMD-LSTM Future Forecast', color='red', linestyle='--', linewidth=2) # Changed to LSTM
plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title('EMD-LSTM Future Price Forecast (Next 12 Weeks)') # Changed to LSTM
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result21.png", dpi=300, bbox_inches='tight')
plt.show()
except Exception as e:
print(f"Future forecasting failed: {e}")
# --- EMD-LSTM Benefits Summary ---
print("\n" + "="*60)
print("EMD-LSTM MODEL ADVANTAGES")
print("="*60)
print("1. Adaptive Decomposition: EMD adapts to data characteristics")
print("2. Multi-scale Analysis: Captures patterns at different time scales")
print("3. Long-term Memory: LSTM handles long-term dependencies effectively")
print("4. Noise Reduction: Removes high-frequency noise effectively")
print("5. Interpretability: IMFs provide insight into data components")
print("6. Non-linear Handling: Effective for non-stationary, non-linear data")
print("7. Robust Forecasting: Combines EMD's decomposition with LSTM's sequence learning")
print("8. Gate Mechanism: LSTM's gates control information flow for better learning")
print("9. Vanishing Gradient Solution: LSTM handles long sequences better than simple RNNs")
print("10. Complex Pattern Capture: Better for capturing complex temporal patterns")
Original data length: 722 Performing EMD decomposition...
Selected 5 meaningful IMFs
Training sequences: (468, 52, 1) Validation sequences: (100, 52, 1) Test sequences: (102, 52, 1) Starting LSTM hyperparameter tuning... Reloading Tuner from emd_lstm_tuning\cardamom_emd_lstm\tuner0.json Best Hyperparameters: Number of LSTM layers: 1 Learning rate: 0.0009511933717016039 LSTM layer 1 units: 32 LSTM layer 1 dropout: 0.1 Training final EMD-LSTM model... Epoch 1/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 5s 58ms/step - loss: 0.0585 - mae: 0.1576 - val_loss: 0.0026 - val_mae: 0.0417 Epoch 2/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0128 - mae: 0.0910 - val_loss: 0.0028 - val_mae: 0.0422 Epoch 3/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0102 - mae: 0.0641 - val_loss: 0.0015 - val_mae: 0.0263 Epoch 4/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0081 - mae: 0.0507 - val_loss: 0.0018 - val_mae: 0.0287 Epoch 5/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0091 - mae: 0.0500 - val_loss: 0.0014 - val_mae: 0.0252 Epoch 6/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0100 - mae: 0.0554 - val_loss: 0.0016 - val_mae: 0.0279 Epoch 7/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0057 - mae: 0.0449 - val_loss: 0.0015 - val_mae: 0.0259 Epoch 8/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0091 - mae: 0.0538 - val_loss: 0.0014 - val_mae: 0.0252 Epoch 9/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0078 - mae: 0.0445 - val_loss: 0.0017 - val_mae: 0.0301 Epoch 10/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0065 - mae: 0.0470 - val_loss: 0.0013 - val_mae: 0.0250 Epoch 11/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0074 - mae: 0.0465 - val_loss: 0.0013 - val_mae: 0.0238 Epoch 12/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0079 - mae: 0.0472 - val_loss: 0.0014 - val_mae: 0.0267 Epoch 13/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0053 - mae: 0.0397 - val_loss: 0.0014 - val_mae: 0.0260 Epoch 14/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0058 - mae: 0.0433 - val_loss: 0.0012 - val_mae: 0.0241 Epoch 15/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0069 - mae: 0.0439 - val_loss: 0.0013 - val_mae: 0.0255 Epoch 16/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0068 - mae: 0.0429 - val_loss: 0.0012 - val_mae: 0.0242 Epoch 17/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0066 - mae: 0.0429 - val_loss: 0.0012 - val_mae: 0.0235 Epoch 18/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0061 - mae: 0.0406 - val_loss: 0.0011 - val_mae: 0.0234 Epoch 19/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0078 - mae: 0.0442 - val_loss: 0.0013 - val_mae: 0.0256 Epoch 20/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0072 - mae: 0.0450 - val_loss: 0.0014 - val_mae: 0.0275 Epoch 21/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 0.0050 - mae: 0.0406 - val_loss: 0.0010 - val_mae: 0.0216 Epoch 22/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0081 - mae: 0.0435 - val_loss: 0.0011 - val_mae: 0.0231 Epoch 23/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0052 - mae: 0.0369 - val_loss: 0.0014 - val_mae: 0.0283 Epoch 24/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0058 - mae: 0.0435 - val_loss: 9.7675e-04 - val_mae: 0.0210 Epoch 25/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0065 - mae: 0.0410 - val_loss: 0.0013 - val_mae: 0.0271 Epoch 26/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0042 - mae: 0.0345 - val_loss: 0.0014 - val_mae: 0.0284 Epoch 27/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0042 - mae: 0.0392 - val_loss: 0.0012 - val_mae: 0.0254 Epoch 28/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0049 - mae: 0.0343 - val_loss: 0.0017 - val_mae: 0.0325 Epoch 29/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0063 - mae: 0.0466 - val_loss: 9.1518e-04 - val_mae: 0.0209 Epoch 30/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0049 - mae: 0.0366 - val_loss: 0.0011 - val_mae: 0.0239 Epoch 31/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0049 - mae: 0.0362 - val_loss: 0.0016 - val_mae: 0.0311 Epoch 32/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0054 - mae: 0.0385 - val_loss: 8.1606e-04 - val_mae: 0.0197 Epoch 33/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0051 - mae: 0.0379 - val_loss: 8.5663e-04 - val_mae: 0.0207 Epoch 34/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0044 - mae: 0.0349 - val_loss: 0.0015 - val_mae: 0.0302 Epoch 35/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0052 - mae: 0.0390 - val_loss: 7.7333e-04 - val_mae: 0.0190 Epoch 36/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0039 - mae: 0.0322 - val_loss: 0.0014 - val_mae: 0.0297 Epoch 37/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0060 - mae: 0.0452 - val_loss: 7.4305e-04 - val_mae: 0.0187 Epoch 38/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0046 - mae: 0.0334 - val_loss: 0.0015 - val_mae: 0.0305 Epoch 39/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0043 - mae: 0.0370 - val_loss: 9.1028e-04 - val_mae: 0.0222 Epoch 40/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0046 - mae: 0.0372 - val_loss: 8.6741e-04 - val_mae: 0.0210 Epoch 41/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0042 - mae: 0.0318 - val_loss: 0.0011 - val_mae: 0.0245 Epoch 42/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0032 - mae: 0.0297 - val_loss: 7.3894e-04 - val_mae: 0.0193 Epoch 43/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0044 - mae: 0.0340 - val_loss: 9.4439e-04 - val_mae: 0.0229 Epoch 44/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0033 - mae: 0.0300 - val_loss: 9.4176e-04 - val_mae: 0.0227 Epoch 45/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0052 - mae: 0.0389 - val_loss: 8.0505e-04 - val_mae: 0.0203 Epoch 46/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0045 - mae: 0.0336 - val_loss: 7.6528e-04 - val_mae: 0.0197 Epoch 47/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0037 - mae: 0.0308 - val_loss: 7.8578e-04 - val_mae: 0.0205 Epoch 48/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0030 - mae: 0.0297 - val_loss: 8.7602e-04 - val_mae: 0.0219 Epoch 49/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0040 - mae: 0.0316 - val_loss: 6.5140e-04 - val_mae: 0.0179 Epoch 50/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0034 - mae: 0.0303 - val_loss: 9.1689e-04 - val_mae: 0.0227 Epoch 51/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0034 - mae: 0.0313 - val_loss: 7.6802e-04 - val_mae: 0.0204 Epoch 52/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0026 - mae: 0.0294 - val_loss: 6.1360e-04 - val_mae: 0.0175 Epoch 53/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0027 - mae: 0.0281 - val_loss: 9.3396e-04 - val_mae: 0.0231 Epoch 54/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0041 - mae: 0.0328 - val_loss: 5.4212e-04 - val_mae: 0.0161 Epoch 55/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0040 - mae: 0.0329 - val_loss: 7.0556e-04 - val_mae: 0.0195 Epoch 56/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0035 - mae: 0.0285 - val_loss: 5.5858e-04 - val_mae: 0.0167 Epoch 57/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 0.0030 - mae: 0.0267 - val_loss: 7.6100e-04 - val_mae: 0.0206 Epoch 58/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0037 - mae: 0.0299 - val_loss: 6.8217e-04 - val_mae: 0.0193 Epoch 59/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0027 - mae: 0.0284 - val_loss: 6.6307e-04 - val_mae: 0.0186 Epoch 60/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0020 - mae: 0.0247 - val_loss: 6.1766e-04 - val_mae: 0.0181 Epoch 61/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0037 - mae: 0.0317 - val_loss: 5.5153e-04 - val_mae: 0.0164 Epoch 62/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0034 - mae: 0.0303 - val_loss: 7.6360e-04 - val_mae: 0.0206 Epoch 63/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0029 - mae: 0.0274 - val_loss: 4.5563e-04 - val_mae: 0.0148 Epoch 64/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0032 - mae: 0.0287 - val_loss: 9.5250e-04 - val_mae: 0.0241 Epoch 65/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0025 - mae: 0.0288 - val_loss: 4.5510e-04 - val_mae: 0.0146 Epoch 66/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 0.0025 - mae: 0.0274 - val_loss: 6.7691e-04 - val_mae: 0.0194 Epoch 67/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0025 - mae: 0.0273 - val_loss: 4.8812e-04 - val_mae: 0.0157 Epoch 68/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0032 - mae: 0.0296 - val_loss: 4.3872e-04 - val_mae: 0.0146 Epoch 69/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0033 - mae: 0.0282 - val_loss: 8.4235e-04 - val_mae: 0.0226 Epoch 70/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0018 - mae: 0.0243 - val_loss: 4.0855e-04 - val_mae: 0.0142 Epoch 71/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0024 - mae: 0.0266 - val_loss: 4.4461e-04 - val_mae: 0.0150 Epoch 72/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0043 - mae: 0.0309 - val_loss: 6.5214e-04 - val_mae: 0.0193 Epoch 73/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0032 - mae: 0.0306 - val_loss: 3.7003e-04 - val_mae: 0.0133 Epoch 74/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0025 - mae: 0.0239 - val_loss: 5.9848e-04 - val_mae: 0.0186 Epoch 75/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0028 - mae: 0.0295 - val_loss: 3.4162e-04 - val_mae: 0.0128 Epoch 76/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0026 - mae: 0.0249 - val_loss: 5.3592e-04 - val_mae: 0.0171 Epoch 77/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0023 - mae: 0.0249 - val_loss: 4.0675e-04 - val_mae: 0.0144 Epoch 78/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0026 - mae: 0.0262 - val_loss: 3.3470e-04 - val_mae: 0.0128 Epoch 79/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 0.0036 - mae: 0.0299 - val_loss: 3.5309e-04 - val_mae: 0.0130 Epoch 80/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0026 - mae: 0.0268 - val_loss: 4.3947e-04 - val_mae: 0.0153 Epoch 81/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0024 - mae: 0.0282 - val_loss: 3.9156e-04 - val_mae: 0.0143 Epoch 82/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0015 - mae: 0.0220 - val_loss: 3.3303e-04 - val_mae: 0.0129 Epoch 83/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0022 - mae: 0.0246 - val_loss: 3.5768e-04 - val_mae: 0.0135 Epoch 84/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0023 - mae: 0.0236 - val_loss: 4.0562e-04 - val_mae: 0.0145 Epoch 85/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0022 - mae: 0.0242 - val_loss: 3.3461e-04 - val_mae: 0.0131 Epoch 86/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0028 - mae: 0.0264 - val_loss: 4.5306e-04 - val_mae: 0.0159 Epoch 87/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0026 - mae: 0.0264 - val_loss: 3.1444e-04 - val_mae: 0.0124 Epoch 88/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0025 - mae: 0.0242 - val_loss: 3.6463e-04 - val_mae: 0.0139 Epoch 89/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0018 - mae: 0.0231 - val_loss: 3.3915e-04 - val_mae: 0.0133 Epoch 90/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 0.0022 - mae: 0.0272 - val_loss: 3.2027e-04 - val_mae: 0.0134 Epoch 91/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0029 - mae: 0.0246 - val_loss: 4.8360e-04 - val_mae: 0.0167 Epoch 92/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0238 - val_loss: 3.3508e-04 - val_mae: 0.0133 Epoch 93/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0030 - mae: 0.0268 - val_loss: 2.6617e-04 - val_mae: 0.0117 Epoch 94/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0021 - mae: 0.0229 - val_loss: 2.6543e-04 - val_mae: 0.0116 Epoch 95/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0015 - mae: 0.0210 - val_loss: 4.7103e-04 - val_mae: 0.0166 Epoch 96/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0018 - mae: 0.0244 - val_loss: 2.8703e-04 - val_mae: 0.0124 Epoch 97/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0021 - mae: 0.0225 - val_loss: 4.7690e-04 - val_mae: 0.0169 Epoch 98/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0017 - mae: 0.0212 - val_loss: 2.9993e-04 - val_mae: 0.0124 Epoch 99/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0228 - val_loss: 2.7499e-04 - val_mae: 0.0118 Epoch 100/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - loss: 0.0021 - mae: 0.0239 - val_loss: 3.5554e-04 - val_mae: 0.0140 Epoch 101/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0023 - mae: 0.0249 - val_loss: 3.2368e-04 - val_mae: 0.0130 Epoch 102/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - loss: 0.0019 - mae: 0.0219 - val_loss: 3.2849e-04 - val_mae: 0.0131 Epoch 103/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0013 - mae: 0.0209 - val_loss: 2.4490e-04 - val_mae: 0.0110 Epoch 104/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0022 - mae: 0.0233 - val_loss: 2.5431e-04 - val_mae: 0.0113 Epoch 105/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0017 - mae: 0.0212 - val_loss: 5.1841e-04 - val_mae: 0.0182 Epoch 106/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0021 - mae: 0.0267 - val_loss: 3.1582e-04 - val_mae: 0.0141 Epoch 107/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0016 - mae: 0.0234 - val_loss: 2.5921e-04 - val_mae: 0.0117 Epoch 108/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 0.0019 - mae: 0.0228 - val_loss: 2.4556e-04 - val_mae: 0.0114 Epoch 109/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0015 - mae: 0.0217 - val_loss: 2.7566e-04 - val_mae: 0.0121 Epoch 110/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0015 - mae: 0.0213 - val_loss: 3.3272e-04 - val_mae: 0.0137 Epoch 111/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0227 - val_loss: 2.0712e-04 - val_mae: 0.0102 Epoch 112/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0014 - mae: 0.0207 - val_loss: 1.9954e-04 - val_mae: 0.0100 Epoch 113/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0015 - mae: 0.0203 - val_loss: 1.9679e-04 - val_mae: 0.0100 Epoch 114/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0224 - val_loss: 2.6282e-04 - val_mae: 0.0119 Epoch 115/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0014 - mae: 0.0195 - val_loss: 4.3449e-04 - val_mae: 0.0168 Epoch 116/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0013 - mae: 0.0218 - val_loss: 1.8536e-04 - val_mae: 0.0095 Epoch 117/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 0.0021 - mae: 0.0222 - val_loss: 3.5083e-04 - val_mae: 0.0144 Epoch 118/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0014 - mae: 0.0203 - val_loss: 2.2835e-04 - val_mae: 0.0109 Epoch 119/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0016 - mae: 0.0221 - val_loss: 3.1757e-04 - val_mae: 0.0137 Epoch 120/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0010 - mae: 0.0200 - val_loss: 2.0700e-04 - val_mae: 0.0104 Epoch 121/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0013 - mae: 0.0203 - val_loss: 1.7707e-04 - val_mae: 0.0095 Epoch 122/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0189 - val_loss: 2.0155e-04 - val_mae: 0.0109 Epoch 123/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0018 - mae: 0.0214 - val_loss: 7.8670e-04 - val_mae: 0.0243 Epoch 124/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 0.0012 - mae: 0.0216 - val_loss: 1.6159e-04 - val_mae: 0.0089 Epoch 125/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0019 - mae: 0.0228 - val_loss: 1.6125e-04 - val_mae: 0.0091 Epoch 126/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0187 - val_loss: 1.8770e-04 - val_mae: 0.0099 Epoch 127/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0012 - mae: 0.0194 - val_loss: 1.5224e-04 - val_mae: 0.0088 Epoch 128/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0012 - mae: 0.0191 - val_loss: 4.8986e-04 - val_mae: 0.0185 Epoch 129/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.9127e-04 - mae: 0.0194 - val_loss: 1.4994e-04 - val_mae: 0.0085 Epoch 130/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0188 - val_loss: 1.3761e-04 - val_mae: 0.0085 Epoch 131/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0012 - mae: 0.0192 - val_loss: 2.1265e-04 - val_mae: 0.0110 Epoch 132/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 0.0012 - mae: 0.0199 - val_loss: 2.2934e-04 - val_mae: 0.0113 Epoch 133/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0014 - mae: 0.0207 - val_loss: 1.3606e-04 - val_mae: 0.0084 Epoch 134/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0177 - val_loss: 1.4136e-04 - val_mae: 0.0087 Epoch 135/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0186 - val_loss: 1.3536e-04 - val_mae: 0.0082 Epoch 136/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 9.1539e-04 - mae: 0.0169 - val_loss: 4.2233e-04 - val_mae: 0.0166 Epoch 137/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0014 - mae: 0.0214 - val_loss: 1.2505e-04 - val_mae: 0.0080 Epoch 138/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0013 - mae: 0.0186 - val_loss: 1.5058e-04 - val_mae: 0.0090 Epoch 139/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 7.2309e-04 - mae: 0.0169 - val_loss: 1.2093e-04 - val_mae: 0.0079 Epoch 140/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0012 - mae: 0.0183 - val_loss: 1.1873e-04 - val_mae: 0.0078 Epoch 141/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0012 - mae: 0.0171 - val_loss: 1.2033e-04 - val_mae: 0.0080 Epoch 142/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.6240e-04 - mae: 0.0167 - val_loss: 1.1982e-04 - val_mae: 0.0080 Epoch 143/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0012 - mae: 0.0185 - val_loss: 3.3373e-04 - val_mae: 0.0147 Epoch 144/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.4580e-04 - mae: 0.0179 - val_loss: 2.5442e-04 - val_mae: 0.0125 Epoch 145/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 9.7754e-04 - mae: 0.0171 - val_loss: 1.3433e-04 - val_mae: 0.0085 Epoch 146/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 0.0013 - mae: 0.0186 - val_loss: 1.0841e-04 - val_mae: 0.0076 Epoch 147/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0181 - val_loss: 1.2146e-04 - val_mae: 0.0081 Epoch 148/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 9.8543e-04 - mae: 0.0166 - val_loss: 1.0910e-04 - val_mae: 0.0077 Epoch 149/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0178 - val_loss: 2.5051e-04 - val_mae: 0.0123 Epoch 150/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0013 - mae: 0.0195 - val_loss: 1.8388e-04 - val_mae: 0.0101 Epoch 151/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 9.5832e-04 - mae: 0.0163 - val_loss: 1.1106e-04 - val_mae: 0.0075 Epoch 152/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - loss: 0.0015 - mae: 0.0189 - val_loss: 1.6470e-04 - val_mae: 0.0096 Epoch 153/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 6.8445e-04 - mae: 0.0157 - val_loss: 2.8354e-04 - val_mae: 0.0134 Epoch 154/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0186 - val_loss: 1.1186e-04 - val_mae: 0.0077 Epoch 155/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.8480e-04 - mae: 0.0160 - val_loss: 1.3828e-04 - val_mae: 0.0086 Epoch 156/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0011 - mae: 0.0169 - val_loss: 2.5993e-04 - val_mae: 0.0128 Epoch 157/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0014 - mae: 0.0183 - val_loss: 3.0558e-04 - val_mae: 0.0144 Epoch 158/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 0.0010 - mae: 0.0178 - val_loss: 9.1795e-05 - val_mae: 0.0069 Epoch 159/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.1813e-04 - mae: 0.0158 - val_loss: 1.4611e-04 - val_mae: 0.0099 Epoch 160/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0011 - mae: 0.0178 - val_loss: 1.6432e-04 - val_mae: 0.0106 Epoch 161/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0010 - mae: 0.0174 - val_loss: 1.1594e-04 - val_mae: 0.0077 Epoch 162/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 7.0594e-04 - mae: 0.0145 - val_loss: 1.1772e-04 - val_mae: 0.0079 Epoch 163/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 6.1016e-04 - mae: 0.0144 - val_loss: 2.0367e-04 - val_mae: 0.0112 Epoch 164/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - loss: 0.0011 - mae: 0.0183 - val_loss: 1.2677e-04 - val_mae: 0.0083 Epoch 165/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 5.8384e-04 - mae: 0.0136 - val_loss: 1.0950e-04 - val_mae: 0.0075 Epoch 166/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.0814e-04 - mae: 0.0167 - val_loss: 9.1837e-05 - val_mae: 0.0070 Epoch 167/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 9.7039e-04 - mae: 0.0156 - val_loss: 9.4053e-05 - val_mae: 0.0072 Epoch 168/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 0.0010 - mae: 0.0163 - val_loss: 1.1507e-04 - val_mae: 0.0076 Epoch 169/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 9.1908e-04 - mae: 0.0159 - val_loss: 1.2428e-04 - val_mae: 0.0081 Epoch 170/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.6696e-04 - mae: 0.0161 - val_loss: 8.2288e-05 - val_mae: 0.0067 Epoch 171/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.2600e-04 - mae: 0.0161 - val_loss: 1.0296e-04 - val_mae: 0.0077 Epoch 172/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 9.2578e-04 - mae: 0.0164 - val_loss: 9.2268e-05 - val_mae: 0.0070 Epoch 173/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 6.7954e-04 - mae: 0.0145 - val_loss: 2.4366e-04 - val_mae: 0.0126 Epoch 174/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 6.1940e-04 - mae: 0.0155 - val_loss: 1.0093e-04 - val_mae: 0.0070 Epoch 175/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - loss: 7.1997e-04 - mae: 0.0147 - val_loss: 2.2690e-04 - val_mae: 0.0120 Epoch 176/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 7.7937e-04 - mae: 0.0164 - val_loss: 1.3270e-04 - val_mae: 0.0086 Epoch 177/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 8.9832e-04 - mae: 0.0173 - val_loss: 7.6299e-05 - val_mae: 0.0064 Epoch 178/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 7.7817e-04 - mae: 0.0156 - val_loss: 7.6378e-05 - val_mae: 0.0064 Epoch 179/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.0183e-04 - mae: 0.0141 - val_loss: 7.9451e-05 - val_mae: 0.0065 Epoch 180/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 6.6811e-04 - mae: 0.0144 - val_loss: 9.9020e-05 - val_mae: 0.0072 Epoch 181/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.8176e-04 - mae: 0.0157 - val_loss: 3.4142e-04 - val_mae: 0.0157 Epoch 182/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0015 - mae: 0.0204 - val_loss: 8.6025e-05 - val_mae: 0.0066 Epoch 183/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 7.2775e-04 - mae: 0.0143 - val_loss: 9.6719e-05 - val_mae: 0.0072 Epoch 184/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 5.7785e-04 - mae: 0.0141 - val_loss: 7.7405e-05 - val_mae: 0.0065 Epoch 185/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 8.1382e-04 - mae: 0.0151 - val_loss: 9.5966e-05 - val_mae: 0.0070 Epoch 186/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 5.9626e-04 - mae: 0.0144 - val_loss: 2.2421e-04 - val_mae: 0.0123 Epoch 187/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 8.4605e-04 - mae: 0.0164 - val_loss: 1.4574e-04 - val_mae: 0.0094 Epoch 188/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 9.7219e-04 - mae: 0.0162 - val_loss: 8.9095e-05 - val_mae: 0.0070 Epoch 189/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 6.9759e-04 - mae: 0.0147 - val_loss: 1.0607e-04 - val_mae: 0.0076 Epoch 190/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 8.6061e-04 - mae: 0.0159 - val_loss: 9.9451e-05 - val_mae: 0.0072 Epoch 191/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 7.5490e-04 - mae: 0.0159 - val_loss: 7.2481e-05 - val_mae: 0.0060 Epoch 192/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.4144e-04 - mae: 0.0149 - val_loss: 1.2695e-04 - val_mae: 0.0096 Epoch 193/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 7.1294e-04 - mae: 0.0153 - val_loss: 7.7313e-05 - val_mae: 0.0062 Epoch 194/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0011 - mae: 0.0165 - val_loss: 2.1930e-04 - val_mae: 0.0123 Epoch 195/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 6.8209e-04 - mae: 0.0154 - val_loss: 1.7889e-04 - val_mae: 0.0109 Epoch 196/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step - loss: 9.1040e-04 - mae: 0.0175 - val_loss: 5.9942e-05 - val_mae: 0.0056 Epoch 197/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 6.3125e-04 - mae: 0.0130 - val_loss: 1.7965e-04 - val_mae: 0.0110 Epoch 198/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 8.8768e-04 - mae: 0.0171 - val_loss: 1.1937e-04 - val_mae: 0.0082 Epoch 199/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 9.5758e-04 - mae: 0.0166 - val_loss: 7.4636e-05 - val_mae: 0.0060 Epoch 200/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 5.0818e-04 - mae: 0.0128 - val_loss: 2.2375e-04 - val_mae: 0.0127 4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 111ms/step ============================================================ EMD-LSTM MODEL TRAINING SUMMARY ============================================================ Final epochs trained: 200 Best validation loss: 0.0001 Best validation MAE: 0.0056 Lookback period: 52 weeks Number of IMFs used: 5 LSTM Model Architecture:
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ lstm (LSTM) │ (None, 32) │ 4,352 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout (Dropout) │ (None, 32) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense (Dense) │ (None, 1) │ 33 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 13,157 (51.40 KB)
Trainable params: 4,385 (17.13 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 8,772 (34.27 KB)
============================================================ EVALUATION ON RECONSTRUCTED DATA ============================================================ MSE: 3562.6696 RMSE: 59.6881 MAE: 44.4461 MAPE: 0.03% R²: 0.9789 Directional Accuracy: 91.09% ============================================================ EVALUATION ON ORIGINAL DATA ============================================================ MSE: 25499.4386 RMSE: 159.6854 MAE: 113.4441 MAPE: 0.07% R²: 0.8579 Directional Accuracy: 29.70%
<Figure size 1800x1200 with 0 Axes>
EMD-LSTM Residual Analysis: Residual mean: 33.3420 Residual std: 156.1658 Residual min: -509.8165 Residual max: 655.3623 ================================================== FUTURE FORECAST - EMD-LSTM MODEL (NEXT 12 WEEKS) ================================================== 2024-11-03: 2103.75 2024-11-10: 2001.36 2024-11-17: 1899.58 2024-11-24: 1822.62 2024-12-01: 1770.56 2024-12-08: 1736.21 2024-12-15: 1712.98 2024-12-22: 1696.06 2024-12-29: 1681.84 2025-01-05: 1667.67 2025-01-12: 1651.91 2025-01-19: 1633.96
============================================================ EMD-LSTM MODEL ADVANTAGES ============================================================ 1. Adaptive Decomposition: EMD adapts to data characteristics 2. Multi-scale Analysis: Captures patterns at different time scales 3. Long-term Memory: LSTM handles long-term dependencies effectively 4. Noise Reduction: Removes high-frequency noise effectively 5. Interpretability: IMFs provide insight into data components 6. Non-linear Handling: Effective for non-stationary, non-linear data 7. Robust Forecasting: Combines EMD's decomposition with LSTM's sequence learning 8. Gate Mechanism: LSTM's gates control information flow for better learning 9. Vanishing Gradient Solution: LSTM handles long sequences better than simple RNNs 10. Complex Pattern Capture: Better for capturing complex temporal patterns
In [40]:
# --- Step 11: Detailed Model Configuration Report & JSON Export (EMD-LSTM) ---
import json
import datetime
import platform
from tensorflow.keras import backend as K
print("\n" + "="*60)
print("EMD + LSTM MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)
report = {}
# Optimizer details
try:
opt = final_model.optimizer
opt_name = opt.__class__.__name__
try:
lr_val = float(K.get_value(opt.learning_rate))
except Exception:
opt_cfg = opt.get_config()
lr_val = opt_cfg.get('learning_rate', None)
try:
lr_val = float(lr_val)
except Exception:
pass
report['optimizer'] = opt_name
report['learning_rate'] = lr_val
print(f"Optimizer: {opt_name}")
print(f"Learning Rate: {lr_val}")
except Exception as e:
report['optimizer'] = str(e)
print(f"Could not fetch optimizer details: {e}")
# Hyperparameters
try:
report['best_hyperparameters'] = best_hp.values
print("\nBest Hyperparameters (from tuner):")
for k, v in best_hp.values.items():
print(f" {k}: {v}")
except Exception as e:
report['best_hyperparameters'] = None
print(f"No best hyperparameters found: {e}")
# Model layers
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
layer_info = {
'index': i+1,
'class_name': layer.__class__.__name__,
'name': layer.name
}
if hasattr(layer, 'units'):
layer_info['units'] = getattr(layer, 'units', None)
print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
else:
print(f" Layer {i+1}: {layer.__class__.__name__}")
if hasattr(layer, 'activation'):
try:
layer_info['activation'] = layer.activation.__name__
except Exception:
layer_info['activation'] = str(layer.activation)
if hasattr(layer, 'rate'):
layer_info['dropout_rate'] = getattr(layer, 'rate', None)
if hasattr(layer, 'return_sequences'):
layer_info['return_sequences'] = getattr(layer, 'return_sequences', None)
try:
layer_info['input_shape'] = layer.input_shape
layer_info['output_shape'] = layer.output_shape
except:
layer_info['input_shape'] = None
layer_info['output_shape'] = None
layers_report.append(layer_info)
report['layers'] = layers_report
# Training summary
training_summary = {
'lookback': lookback,
'epochs_trained': len(history.history['loss']),
'final_training_loss': float(history.history['loss'][-1]),
'final_validation_loss': float(min(history.history['val_loss'])),
'final_training_mae': float(history.history['mae'][-1]) if 'mae' in history.history else None,
'final_validation_mae': float(min(history.history['val_mae'])) if 'val_mae' in history.history else None,
'num_imfs_used': len(imfs)
}
report['training_summary'] = training_summary
print("\nTraining Summary:")
for k, v in training_summary.items():
print(f" {k}: {v}")
# Evaluation
try:
report['evaluation_metrics'] = {
'reconstructed': metrics_reconstructed,
'original': metrics_original
}
print("\nEvaluation Metrics attached.")
except Exception as e:
report['evaluation_metrics'] = None
print(f"Could not attach evaluation metrics: {e}")
# Residual stats
try:
residuals_stats = {
'residual_mean': float(np.mean(residuals)),
'residual_std': float(np.std(residuals)),
'residual_min': float(np.min(residuals)),
'residual_max': float(np.max(residuals))
}
report['residuals'] = residuals_stats
print("\nResiduals Summary attached.")
except Exception as e:
print(f"Residual stats failed: {e}")
# Future forecast (if available)
try:
forecast_report = {
'dates': [str(d) for d in future_dates],
'forecasted_prices': [float(p) for p in future_prices]
}
report['future_forecast'] = forecast_report
print("\nFuture forecast added to report.")
except Exception as e:
report['future_forecast'] = None
print(f"Future forecast not added: {e}")
# Metadata
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__
# Save JSON
report_filename = "emd_lstm_report.json"
with open(report_filename, "w", encoding="utf-8") as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\nSaved detailed report to: {report_filename}")
print("="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================ EMD + LSTM MODEL CONFIGURATION & TRAINING REPORT ============================================================ Optimizer: Adam Learning Rate: 0.0009511933894827962 Best Hyperparameters (from tuner): num_layers: 1 units_0: 32 dropout_0: 0.1 dense_layers: 0 learning_rate: 0.0009511933717016039 units_1: 160 dropout_1: 0.30000000000000004 dense_units_0: 128 dense_dropout_0: 0.4 dense_units_1: 16 dense_dropout_1: 0.4 units_2: 32 dropout_2: 0.1 Model Layers: Layer 1: LSTM - units: 32 Layer 2: Dropout Layer 3: Dense - units: 1 Training Summary: lookback: 52 epochs_trained: 200 final_training_loss: 0.0008889764430932701 final_validation_loss: 6.141273479443043e-05 final_training_mae: 0.015297123230993748 final_validation_mae: 0.005484431982040405 num_imfs_used: 5 Evaluation Metrics attached. Residuals Summary attached. Future forecast added to report. Saved detailed report to: emd_lstm_report.json ============================================================ REPORT COMPLETE ============================================================
In [41]:
print("Performing EMD decomposition...")
emd = EMD()
imfs = emd.emd(data, max_imf=5) # Extract up to 5 IMFs
# Plot IMFs
plt.figure(figsize=(16, 4*(len(imfs)+1)))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r', linewidth=2)
plt.title("Original Time Series")
plt.grid()
for i, imf in enumerate(imfs):
plt.subplot(len(imfs)+1, 1, i+2)
plt.plot(df.index, imf, 'g')
plt.title(f"IMF {i+1} (Variance: {np.var(imf):.2f})")
plt.grid()
plt.tight_layout()
# Save the figure as PNG (high resolution)
plt.savefig("emd_decomposition3.png", dpi=300)
plt.show()
Performing EMD decomposition...
In [28]:
plt.figure(figsize=(14, 6))
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (LSTM)', color='red', linestyle='--', linewidth=2) # Changed to LSTM
plt.title('Actual vs Predicted - EMD-LSTM Model (Test Period)') # Changed to LSTM
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/elstR_result211.png", dpi=300, bbox_inches='tight')
plt.show()
<Figure size 1400x600 with 0 Axes>
In [ ]:
In [55]:
############
-------EMD + GRU ----¶
============ IMPORT LIBRARIES ============¶
In [30]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PyEMD import EMD
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import tensorflow as tf
# Suppress warnings
warnings.filterwarnings("ignore")
df=pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx",parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
print(f"Original data length: {len(data)}")
# --- Step 1: EMD Decomposition ---
print("Performing EMD decomposition...")
emd = EMD()
imfs = emd.emd(data, max_imf=5) # Extract up to 5 IMFs
# Plot IMFs
plt.figure(figsize=(16, 4*(len(imfs)+1)))
plt.subplot(len(imfs)+1, 1, 1)
plt.plot(df.index, data, 'r', linewidth=2)
plt.title("Original Time Series")
plt.grid()
for i, imf in enumerate(imfs):
plt.subplot(len(imfs)+1, 1, i+2)
plt.plot(df.index, imf, 'g')
plt.title(f"IMF {i+1} (Variance: {np.var(imf):.2f})")
plt.grid()
plt.tight_layout()
plt.show()
# Filter meaningful IMFs (remove low-variance components)
imfs = [imf for imf in imfs if np.var(imf) > 0.05*np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")
# Reconstruct signal from selected IMFs
reconstructed_data = np.sum(imfs, axis=0)
# Plot original vs reconstructed data
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original', alpha=0.7)
plt.plot(df.index, reconstructed_data, label='EMD Reconstructed', linewidth=2)
plt.title("Original vs EMD-Reconstructed Time Series")
plt.legend()
plt.grid()
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result1.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 2: Data Preparation for GRU ---
# Use reconstructed data for training
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(reconstructed_data.reshape(-1, 1))
# Create sequences for GRU
def create_sequences(data, lookback=52):
X, y = [], []
for i in range(lookback, len(data)):
X.append(data[i-lookback:i])
y.append(data[i])
return np.array(X), np.array(y)
lookback = 52 # 52 weeks lookback
X, y = create_sequences(scaled_data, lookback)
# Reshape for GRU [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# Train/Val/Test split
total_size = len(X)
train_size = int(total_size * 0.7)
val_size = int(total_size * 0.15)
test_size = total_size - train_size - val_size
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
# --- Step 3: Hyperparameter Tuning for GRU ---
def build_model(hp):
model = Sequential()
# Number of GRU layers
for i in range(hp.Int('num_layers', 1, 3)):
model.add(GRU(
units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None
))
model.add(Dropout(hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
# Dense layers with ReLU activation
for i in range(hp.Int('dense_layers', 0, 2)):
model.add(Dense(
units=hp.Int(f'dense_units_{i}', min_value=16, max_value=128, step=16),
activation='relu'
))
model.add(Dropout(hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
model.add(Dense(1, activation='linear'))
model.compile(
optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
loss='mse',
metrics=['mae']
)
return model
print("\nStarting GRU hyperparameter tuning...")
tuner = RandomSearch(
build_model,
objective='val_loss',
max_trials=15,
executions_per_trial=2,
directory='emd_gru_tuning',
project_name='cardamom_emd_gru'
)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
tuner.search(
X_train, y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# Get best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Number of GRU layers: {best_hp.get('num_layers')}")
print(f"Learning rate: {best_hp.get('learning_rate')}")
for i in range(best_hp.get('num_layers')):
print(f"GRU layer {i+1} units: {best_hp.get(f'units_{i}')}")
print(f"GRU layer {i+1} dropout: {best_hp.get(f'dropout_{i}')}")
# --- Step 4: Build and Train Final GRU Model ---
final_model = tuner.hypermodel.build(best_hp)
print("\nTraining final EMD-GRU model...")
history = final_model.fit(
X_train, y_train,
epochs=200,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stopping],
verbose=1
)
# --- Step 5: Forecasting ---
# Predict on test set
y_pred_scaled = final_model.predict(X_test).flatten()
# Inverse transform predictions
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
# Get actual values (original scale)
y_actual_original = data[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
y_actual_reconstructed = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
# --- Step 6: Evaluation ---
def evaluate_forecast(actual, forecast):
"""Comprehensive forecast evaluation"""
mse = mean_squared_error(actual, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual, forecast)
mape = mean_absolute_percentage_error(actual, forecast)
r2 = r2_score(actual, forecast)
# Directional accuracy
actual_diff = np.sign(np.diff(actual))
forecast_diff = np.sign(np.diff(forecast))
da = (np.sum(actual_diff == forecast_diff) / len(actual_diff)) * 100
return {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'MAPE': mape,
'R²': r2,
'Directional Accuracy': da
}
# Evaluate on reconstructed data
metrics_reconstructed = evaluate_forecast(y_actual_reconstructed, y_pred)
# Evaluate on original data
metrics_original = evaluate_forecast(y_actual_original, y_pred)
print("\n" + "="*60)
print("EMD-GRU MODEL TRAINING SUMMARY")
print("="*60)
print(f"Final epochs trained: {len(history.history['loss'])}")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Best validation MAE: {min(history.history['val_mae']):.4f}")
print(f"Lookback period: {lookback} weeks")
print(f"Number of IMFs used: {len(imfs)}")
print("\nGRU Model Architecture:")
final_model.summary()
print("\n" + "="*60)
print("EVALUATION ON RECONSTRUCTED DATA")
print("="*60)
for metric, value in metrics_reconstructed.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
print("\n" + "="*60)
print("EVALUATION ON ORIGINAL DATA")
print("="*60)
for metric, value in metrics_original.items():
if metric == 'MAPE':
print(f"{metric}: {value:.2f}%")
elif metric == 'Directional Accuracy':
print(f"{metric}: {value:.2f}%")
else:
print(f"{metric}: {value:.4f}")
# --- Step 7: Visualization ---
test_dates = df.index[train_size+val_size+lookback:train_size+val_size+lookback+len(y_test)]
# Plot 1: Training History
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('EMD-GRU Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result2.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 2: Original vs Reconstructed vs Forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label='Original Data', alpha=0.7, color='blue')
plt.plot(df.index, reconstructed_data, label='EMD Reconstructed', color='green', linewidth=2)
plt.plot(test_dates, y_pred, label='GRU Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(test_dates[0], color='gray', linestyle='--', label='Test Start')
plt.title('Original vs EMD-Reconstructed vs GRU Forecast')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result3.png", dpi=300, bbox_inches='tight')
plt.show()
# Plot 3: Separate View - Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2)
plt.fill_between(test_dates,
y_pred - metrics_original['RMSE'],
y_pred + metrics_original['RMSE'],
alpha=0.2, color='red', label='± RMSE')
plt.title('Actual vs Predicted - EMD-GRU Model (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result4.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 8: Individual IMF Analysis ---
plt.figure(figsize=(16, 4*len(imfs)))
for i, imf in enumerate(imfs):
plt.subplot(len(imfs), 1, i+1)
plt.plot(df.index, imf, color=['blue', 'green', 'red', 'purple', 'orange'][i % 5])
plt.title(f'IMF {i+1} (Variance: {np.var(imf):.4f})')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result5.png", dpi=300, bbox_inches='tight')
plt.show()
# --- Step 9: Residual Analysis ---
residuals = y_actual_original - y_pred
plt.figure(figsize=(12, 6))
plt.plot(test_dates, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('EMD-GRU Residuals Over Time')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result6.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
plt.axvline(0, color='red', linestyle='--')
plt.title('EMD-GRU Residual Distribution')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result7.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result8.png", dpi=300, bbox_inches='tight')
plt.show()
plt.figure(figsize=(12, 6))
plt.scatter(y_actual_original, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Actual')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result9.png", dpi=300, bbox_inches='tight')
plt.show()
print("\nEMD-GRU Residual Analysis:")
print(f"Residual mean: {residuals.mean():.4f}")
print(f"Residual std: {residuals.std():.4f}")
print(f"Residual min: {residuals.min():.4f}")
print(f"Residual max: {residuals.max():.4f}")
# --- Step 10: Future Forecasting ---
def forecast_future(model, last_sequence, scaler, steps=12):
"""Forecast future values"""
forecasts = []
current_sequence = last_sequence.copy()
for _ in range(steps):
prediction = model.predict(current_sequence.reshape(1, lookback, 1), verbose=0)[0, 0]
forecasts.append(prediction)
# Update sequence
new_sequence = np.vstack([current_sequence[1:], [[prediction]]])
current_sequence = new_sequence
# Inverse transform
forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
# Create future dates
last_date = df.index[-1]
future_dates = pd.date_range(last_date + pd.Timedelta(days=7), periods=steps, freq='W')
return future_dates, forecasts
# Forecast next 12 weeks
try:
last_sequence = scaled_data[-lookback:]
future_dates, future_prices = forecast_future(final_model, last_sequence, scaler, steps=12)
print("\n" + "="*50)
print("FUTURE FORECAST - EMD-GRU MODEL (NEXT 12 WEEKS)")
print("="*50)
for date, price in zip(future_dates, future_prices):
print(f"{date.strftime('%Y-%m-%d')}: {price:.2f}")
# Plot future forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index[-100:], data[-100:], label='Historical Data', color='blue')
plt.plot(future_dates, future_prices, label='EMD-GRU Future Forecast', color='red', linestyle='--', linewidth=2)
plt.axvline(df.index[-1], color='gray', linestyle='--', label='Forecast Start')
plt.title('EMD-GRU Future Price Forecast (Next 12 Weeks)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result11.png", dpi=300, bbox_inches='tight')
plt.show()
except Exception as e:
print(f"Future forecasting failed: {e}")
# --- EMD-GRU Benefits Summary ---
print("\n" + "="*60)
print("EMD-GRU MODEL ADVANTAGES")
print("="*60)
print("1. Adaptive Decomposition: EMD adapts to data characteristics")
print("2. Multi-scale Analysis: Captures patterns at different time scales")
print("3. Noise Reduction: Removes high-frequency noise effectively")
print("4. GRU Efficiency: Faster training than LSTM with similar performance")
print("5. Interpretability: IMFs provide insight into data components")
print("6. Non-linear Handling: Effective for non-stationary, non-linear data")
print("7. Data-driven: No predefined basis functions needed")
Original data length: 722 Performing EMD decomposition...
Selected 5 meaningful IMFs
Training sequences: (468, 52, 1) Validation sequences: (100, 52, 1) Test sequences: (102, 52, 1) Starting GRU hyperparameter tuning... Reloading Tuner from emd_gru_tuning\cardamom_emd_gru\tuner0.json Best Hyperparameters: Number of GRU layers: 1 Learning rate: 0.0019911924591572727 GRU layer 1 units: 160 GRU layer 1 dropout: 0.30000000000000004 Training final EMD-GRU model... Epoch 1/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 6s 81ms/step - loss: 0.0293 - mae: 0.1179 - val_loss: 0.0023 - val_mae: 0.0411 Epoch 2/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0111 - mae: 0.0539 - val_loss: 0.0015 - val_mae: 0.0323 Epoch 3/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0046 - mae: 0.0360 - val_loss: 5.6078e-04 - val_mae: 0.0173 Epoch 4/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0057 - mae: 0.0351 - val_loss: 5.9899e-04 - val_mae: 0.0185 Epoch 5/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0053 - mae: 0.0338 - val_loss: 5.2553e-04 - val_mae: 0.0161 Epoch 6/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0038 - mae: 0.0293 - val_loss: 4.0579e-04 - val_mae: 0.0150 Epoch 7/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0037 - mae: 0.0271 - val_loss: 4.4850e-04 - val_mae: 0.0159 Epoch 8/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0032 - mae: 0.0270 - val_loss: 3.0549e-04 - val_mae: 0.0123 Epoch 9/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0022 - mae: 0.0243 - val_loss: 2.7703e-04 - val_mae: 0.0119 Epoch 10/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 0.0030 - mae: 0.0275 - val_loss: 4.6843e-04 - val_mae: 0.0176 Epoch 11/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0030 - mae: 0.0305 - val_loss: 6.8287e-04 - val_mae: 0.0224 Epoch 12/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0022 - mae: 0.0278 - val_loss: 6.6151e-04 - val_mae: 0.0219 Epoch 13/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0023 - mae: 0.0284 - val_loss: 4.6612e-04 - val_mae: 0.0179 Epoch 14/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0024 - mae: 0.0279 - val_loss: 4.6447e-04 - val_mae: 0.0180 Epoch 15/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0022 - mae: 0.0260 - val_loss: 6.3321e-04 - val_mae: 0.0218 Epoch 16/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0025 - mae: 0.0259 - val_loss: 1.5348e-04 - val_mae: 0.0091 Epoch 17/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0028 - mae: 0.0253 - val_loss: 1.3854e-04 - val_mae: 0.0084 Epoch 18/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0012 - mae: 0.0186 - val_loss: 2.2181e-04 - val_mae: 0.0119 Epoch 19/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0015 - mae: 0.0250 - val_loss: 2.7304e-04 - val_mae: 0.0131 Epoch 20/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0017 - mae: 0.0224 - val_loss: 1.8188e-04 - val_mae: 0.0103 Epoch 21/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0011 - mae: 0.0184 - val_loss: 1.8933e-04 - val_mae: 0.0115 Epoch 22/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0014 - mae: 0.0218 - val_loss: 1.1561e-04 - val_mae: 0.0078 Epoch 23/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0017 - mae: 0.0206 - val_loss: 3.0882e-04 - val_mae: 0.0149 Epoch 24/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0017 - mae: 0.0219 - val_loss: 9.9181e-05 - val_mae: 0.0072 Epoch 25/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0014 - mae: 0.0200 - val_loss: 1.8548e-04 - val_mae: 0.0115 Epoch 26/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0011 - mae: 0.0191 - val_loss: 8.9581e-05 - val_mae: 0.0069 Epoch 27/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0012 - mae: 0.0196 - val_loss: 9.8907e-05 - val_mae: 0.0074 Epoch 28/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0013 - mae: 0.0188 - val_loss: 1.8344e-04 - val_mae: 0.0112 Epoch 29/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0010 - mae: 0.0186 - val_loss: 3.1848e-04 - val_mae: 0.0155 Epoch 30/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0015 - mae: 0.0217 - val_loss: 1.0439e-04 - val_mae: 0.0078 Epoch 31/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0011 - mae: 0.0184 - val_loss: 1.4396e-04 - val_mae: 0.0095 Epoch 32/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 0.0011 - mae: 0.0197 - val_loss: 3.8981e-04 - val_mae: 0.0174 Epoch 33/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 9.8743e-04 - mae: 0.0189 - val_loss: 4.1721e-04 - val_mae: 0.0171 Epoch 34/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 40ms/step - loss: 7.9451e-04 - mae: 0.0170 - val_loss: 9.5853e-05 - val_mae: 0.0076 Epoch 35/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.7573e-04 - mae: 0.0148 - val_loss: 1.1386e-04 - val_mae: 0.0083 Epoch 36/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0011 - mae: 0.0190 - val_loss: 1.0238e-04 - val_mae: 0.0080 Epoch 37/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.0166e-04 - mae: 0.0159 - val_loss: 2.1911e-04 - val_mae: 0.0126 Epoch 38/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.8838e-04 - mae: 0.0175 - val_loss: 1.2841e-04 - val_mae: 0.0086 Epoch 39/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.7967e-04 - mae: 0.0160 - val_loss: 2.8344e-04 - val_mae: 0.0140 Epoch 40/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 9.1914e-04 - mae: 0.0195 - val_loss: 8.2697e-05 - val_mae: 0.0070 Epoch 41/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 9.0963e-04 - mae: 0.0170 - val_loss: 2.2206e-04 - val_mae: 0.0122 Epoch 42/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 0.0012 - mae: 0.0195 - val_loss: 1.0506e-04 - val_mae: 0.0079 Epoch 43/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 40ms/step - loss: 8.5566e-04 - mae: 0.0167 - val_loss: 7.0648e-05 - val_mae: 0.0063 Epoch 44/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 6.4027e-04 - mae: 0.0150 - val_loss: 7.5281e-05 - val_mae: 0.0065 Epoch 45/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - loss: 7.7940e-04 - mae: 0.0162 - val_loss: 7.5000e-05 - val_mae: 0.0067 Epoch 46/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.9797e-04 - mae: 0.0153 - val_loss: 6.6143e-05 - val_mae: 0.0062 Epoch 47/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 9.7651e-04 - mae: 0.0178 - val_loss: 7.6148e-05 - val_mae: 0.0067 Epoch 48/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 5.7434e-04 - mae: 0.0144 - val_loss: 1.2699e-04 - val_mae: 0.0086 Epoch 49/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.7892e-04 - mae: 0.0168 - val_loss: 7.3928e-05 - val_mae: 0.0065 Epoch 50/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 45ms/step - loss: 7.6537e-04 - mae: 0.0143 - val_loss: 2.4773e-04 - val_mae: 0.0134 Epoch 51/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 6.4867e-04 - mae: 0.0164 - val_loss: 8.9058e-05 - val_mae: 0.0075 Epoch 52/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - loss: 7.3921e-04 - mae: 0.0160 - val_loss: 1.5230e-04 - val_mae: 0.0109 Epoch 53/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 8.3437e-04 - mae: 0.0172 - val_loss: 3.1185e-04 - val_mae: 0.0159 Epoch 54/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 8.9073e-04 - mae: 0.0186 - val_loss: 6.0320e-05 - val_mae: 0.0059 Epoch 55/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 8.2049e-04 - mae: 0.0171 - val_loss: 9.8044e-05 - val_mae: 0.0081 Epoch 56/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.0483e-04 - mae: 0.0146 - val_loss: 5.6578e-05 - val_mae: 0.0057 Epoch 57/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 7.0991e-04 - mae: 0.0161 - val_loss: 7.0625e-05 - val_mae: 0.0064 Epoch 58/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 7.0692e-04 - mae: 0.0169 - val_loss: 9.1418e-05 - val_mae: 0.0070 Epoch 59/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.3067e-04 - mae: 0.0154 - val_loss: 5.5248e-05 - val_mae: 0.0057 Epoch 60/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 7.5652e-04 - mae: 0.0160 - val_loss: 6.7263e-05 - val_mae: 0.0063 Epoch 61/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 6.4285e-04 - mae: 0.0163 - val_loss: 7.8720e-05 - val_mae: 0.0067 Epoch 62/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 5.4435e-04 - mae: 0.0152 - val_loss: 1.0905e-04 - val_mae: 0.0083 Epoch 63/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.9498e-04 - mae: 0.0137 - val_loss: 6.7024e-05 - val_mae: 0.0064 Epoch 64/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.1494e-04 - mae: 0.0150 - val_loss: 5.5036e-05 - val_mae: 0.0056 Epoch 65/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.2355e-04 - mae: 0.0141 - val_loss: 8.0716e-05 - val_mae: 0.0068 Epoch 66/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.1913e-04 - mae: 0.0158 - val_loss: 1.0333e-04 - val_mae: 0.0085 Epoch 67/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.3126e-04 - mae: 0.0132 - val_loss: 1.0796e-04 - val_mae: 0.0088 Epoch 68/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 6.5458e-04 - mae: 0.0154 - val_loss: 4.4240e-05 - val_mae: 0.0050 Epoch 69/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.0844e-04 - mae: 0.0134 - val_loss: 1.5036e-04 - val_mae: 0.0102 Epoch 70/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.6434e-04 - mae: 0.0162 - val_loss: 1.3520e-04 - val_mae: 0.0099 Epoch 71/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 6.9844e-04 - mae: 0.0166 - val_loss: 5.7778e-05 - val_mae: 0.0057 Epoch 72/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.7780e-04 - mae: 0.0134 - val_loss: 1.9925e-04 - val_mae: 0.0123 Epoch 73/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 6.4004e-04 - mae: 0.0162 - val_loss: 6.2128e-05 - val_mae: 0.0059 Epoch 74/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 6.6701e-04 - mae: 0.0146 - val_loss: 8.1227e-05 - val_mae: 0.0069 Epoch 75/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.8208e-04 - mae: 0.0129 - val_loss: 4.6064e-05 - val_mae: 0.0050 Epoch 76/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 7.0239e-04 - mae: 0.0149 - val_loss: 1.6548e-04 - val_mae: 0.0118 Epoch 77/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.8861e-04 - mae: 0.0168 - val_loss: 5.9412e-05 - val_mae: 0.0061 Epoch 78/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.0800e-04 - mae: 0.0134 - val_loss: 1.1483e-04 - val_mae: 0.0092 Epoch 79/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.0659e-04 - mae: 0.0168 - val_loss: 1.4421e-04 - val_mae: 0.0105 Epoch 80/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.1082e-04 - mae: 0.0170 - val_loss: 8.2453e-05 - val_mae: 0.0069 Epoch 81/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 5.6185e-04 - mae: 0.0146 - val_loss: 5.8532e-05 - val_mae: 0.0056 Epoch 82/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.6300e-04 - mae: 0.0137 - val_loss: 8.5023e-05 - val_mae: 0.0074 Epoch 83/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.4803e-04 - mae: 0.0137 - val_loss: 3.7564e-05 - val_mae: 0.0046 Epoch 84/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.2892e-04 - mae: 0.0123 - val_loss: 7.5828e-05 - val_mae: 0.0067 Epoch 85/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.6893e-04 - mae: 0.0137 - val_loss: 3.9523e-05 - val_mae: 0.0046 Epoch 86/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 6.2697e-04 - mae: 0.0134 - val_loss: 3.7481e-05 - val_mae: 0.0044 Epoch 87/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.8752e-04 - mae: 0.0135 - val_loss: 3.8978e-05 - val_mae: 0.0045 Epoch 88/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 6.8066e-04 - mae: 0.0162 - val_loss: 6.0206e-05 - val_mae: 0.0061 Epoch 89/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 44ms/step - loss: 5.9555e-04 - mae: 0.0149 - val_loss: 4.7710e-05 - val_mae: 0.0055 Epoch 90/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.4161e-04 - mae: 0.0136 - val_loss: 4.0607e-05 - val_mae: 0.0048 Epoch 91/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 6.3063e-04 - mae: 0.0140 - val_loss: 6.6456e-05 - val_mae: 0.0067 Epoch 92/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 5.5672e-04 - mae: 0.0151 - val_loss: 4.3665e-05 - val_mae: 0.0048 Epoch 93/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.3264e-04 - mae: 0.0140 - val_loss: 4.9607e-05 - val_mae: 0.0052 Epoch 94/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.9061e-04 - mae: 0.0139 - val_loss: 8.1925e-05 - val_mae: 0.0073 Epoch 95/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 3.8341e-04 - mae: 0.0136 - val_loss: 3.7875e-05 - val_mae: 0.0045 Epoch 96/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.4231e-04 - mae: 0.0142 - val_loss: 4.4060e-05 - val_mae: 0.0050 Epoch 97/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 6.3886e-04 - mae: 0.0146 - val_loss: 5.9731e-05 - val_mae: 0.0059 Epoch 98/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 41ms/step - loss: 5.0787e-04 - mae: 0.0131 - val_loss: 1.7641e-04 - val_mae: 0.0117 Epoch 99/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.0109e-04 - mae: 0.0138 - val_loss: 1.6802e-04 - val_mae: 0.0115 Epoch 100/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 4.7143e-04 - mae: 0.0144 - val_loss: 4.0301e-05 - val_mae: 0.0047 Epoch 101/200 15/15 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 5.7215e-04 - mae: 0.0140 - val_loss: 1.0663e-04 - val_mae: 0.0082 4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 130ms/step ============================================================ EMD-GRU MODEL TRAINING SUMMARY ============================================================ Final epochs trained: 101 Best validation loss: 0.0000 Best validation MAE: 0.0044 Lookback period: 52 weeks Number of IMFs used: 5 GRU Model Architecture:
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ gru (GRU) │ (None, 160) │ 78,240 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_1 (Dropout) │ (None, 160) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 1) │ 161 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 235,205 (918.77 KB)
Trainable params: 78,401 (306.25 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 156,804 (612.52 KB)
============================================================ EVALUATION ON RECONSTRUCTED DATA ============================================================ MSE: 2017.1742 RMSE: 44.9130 MAE: 31.7065 MAPE: 0.02% R²: 0.9881 Directional Accuracy: 91.09% ============================================================ EVALUATION ON ORIGINAL DATA ============================================================ MSE: 26172.9512 RMSE: 161.7806 MAE: 114.4259 MAPE: 0.07% R²: 0.8542 Directional Accuracy: 31.68%
EMD-GRU Residual Analysis: Residual mean: 22.6542 Residual std: 160.1866 Residual min: -525.7028 Residual max: 642.2941 ================================================== FUTURE FORECAST - EMD-GRU MODEL (NEXT 12 WEEKS) ================================================== 2024-11-03: 2118.15 2024-11-10: 2047.91 2024-11-17: 1985.80 2024-11-24: 1945.99 2024-12-01: 1918.58 2024-12-08: 1894.45 2024-12-15: 1873.39 2024-12-22: 1856.15 2024-12-29: 1840.26 2025-01-05: 1823.07 2025-01-12: 1804.18 2025-01-19: 1784.49
============================================================ EMD-GRU MODEL ADVANTAGES ============================================================ 1. Adaptive Decomposition: EMD adapts to data characteristics 2. Multi-scale Analysis: Captures patterns at different time scales 3. Noise Reduction: Removes high-frequency noise effectively 4. GRU Efficiency: Faster training than LSTM with similar performance 5. Interpretability: IMFs provide insight into data components 6. Non-linear Handling: Effective for non-stationary, non-linear data 7. Data-driven: No predefined basis functions needed
In [48]:
# --- Step 11: Detailed Model Configuration Report & JSON Export (EMD-GRU) ---
import json
import datetime
import platform
from tensorflow.keras import backend as K
print("\n" + "="*60)
print("EMD + GRU MODEL CONFIGURATION & TRAINING REPORT")
print("="*60)
report = {}
# Optimizer details
try:
opt = final_model.optimizer
opt_name = opt.__class__.__name__
try:
lr_val = float(K.get_value(opt.learning_rate))
except Exception:
opt_cfg = opt.get_config()
lr_val = opt_cfg.get('learning_rate', None)
try:
lr_val = float(lr_val)
except Exception:
pass
report['optimizer'] = opt_name
report['learning_rate'] = lr_val
print(f"Optimizer: {opt_name}")
print(f"Learning Rate: {lr_val}")
except Exception as e:
report['optimizer'] = str(e)
print(f"Could not fetch optimizer details: {e}")
# Hyperparameters
try:
report['best_hyperparameters'] = best_hp.values
print("\nBest Hyperparameters (from tuner):")
for k, v in best_hp.values.items():
print(f" {k}: {v}")
except Exception as e:
report['best_hyperparameters'] = None
print(f"No best hyperparameters found: {e}")
# Model layers
layers_report = []
print("\nModel Layers:")
for i, layer in enumerate(final_model.layers):
layer_info = {
'index': i+1,
'class_name': layer.__class__.__name__,
'name': layer.name
}
if hasattr(layer, 'units'):
layer_info['units'] = getattr(layer, 'units', None)
print(f" Layer {i+1}: {layer.__class__.__name__} - units: {layer_info['units']}")
else:
print(f" Layer {i+1}: {layer.__class__.__name__}")
if hasattr(layer, 'activation'):
try:
layer_info['activation'] = layer.activation.__name__
except Exception:
layer_info['activation'] = str(layer.activation)
if hasattr(layer, 'rate'):
layer_info['dropout_rate'] = getattr(layer, 'rate', None)
if hasattr(layer, 'return_sequences'):
layer_info['return_sequences'] = getattr(layer, 'return_sequences', None)
try:
layer_info['input_shape'] = layer.input_shape
layer_info['output_shape'] = layer.output_shape
except:
layer_info['input_shape'] = None
layer_info['output_shape'] = None
layers_report.append(layer_info)
report['layers'] = layers_report
# Training summary
training_summary = {
'lookback': lookback,
'epochs_trained': len(history.history['loss']),
'final_training_loss': float(history.history['loss'][-1]),
'final_validation_loss': float(min(history.history['val_loss'])),
'final_training_mae': float(history.history['mae'][-1]) if 'mae' in history.history else None,
'final_validation_mae': float(min(history.history['val_mae'])) if 'val_mae' in history.history else None,
'num_imfs_used': len(imfs)
}
report['training_summary'] = training_summary
print("\nTraining Summary:")
for k, v in training_summary.items():
print(f" {k}: {v}")
# Evaluation
try:
report['evaluation_metrics'] = {
'reconstructed': metrics_reconstructed,
'original': metrics_original
}
print("\nEvaluation Metrics attached.")
except Exception as e:
report['evaluation_metrics'] = None
print(f"Could not attach evaluation metrics: {e}")
# Residual stats
try:
residuals_stats = {
'residual_mean': float(np.mean(residuals)),
'residual_std': float(np.std(residuals)),
'residual_min': float(np.min(residuals)),
'residual_max': float(np.max(residuals))
}
report['residuals'] = residuals_stats
print("\nResiduals Summary attached.")
except Exception as e:
print(f"Residual stats failed: {e}")
# Future forecast (if available)
try:
forecast_report = {
'dates': [str(d) for d in future_dates],
'forecasted_prices': [float(p) for p in future_prices]
}
report['future_forecast'] = forecast_report
print("\nFuture forecast added to report.")
except Exception as e:
report['future_forecast'] = None
print(f"Future forecast not added: {e}")
# Metadata
report['generated_at'] = datetime.datetime.now().isoformat()
report['python_version'] = platform.python_version()
report['tensorflow_version'] = tf.__version__
# Save JSON
report_filename = "emd_gru_report.json"
with open(report_filename, "w", encoding="utf-8") as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\nSaved detailed report to: {report_filename}")
print("="*60)
print("REPORT COMPLETE")
print("="*60)
============================================================ EMD + GRU MODEL CONFIGURATION & TRAINING REPORT ============================================================ Optimizer: Adam Learning Rate: 0.0019911923445761204 Best Hyperparameters (from tuner): num_layers: 1 units_0: 160 dropout_0: 0.30000000000000004 dense_layers: 0 learning_rate: 0.0019911924591572727 units_1: 224 dropout_1: 0.1 units_2: 160 dropout_2: 0.1 dense_units_0: 96 dense_dropout_0: 0.5 dense_units_1: 80 dense_dropout_1: 0.4 Model Layers: Layer 1: GRU - units: 160 Layer 2: Dropout Layer 3: Dense - units: 1 Training Summary: lookback: 52 epochs_trained: 127 final_training_loss: 0.0006086781504563987 final_validation_loss: 3.647132689366117e-05 final_training_mae: 0.014383490197360516 final_validation_mae: 0.004545957315713167 num_imfs_used: 5 Evaluation Metrics attached. Residuals Summary attached. Future forecast added to report. Saved detailed report to: emd_gru_report.json ============================================================ REPORT COMPLETE ============================================================
In [31]:
plt.figure(figsize=(12, 6))
# Plot 3: Separate View - Actual vs Predicted
plt.plot(test_dates, y_actual_original, label='Actual (Original)', color='blue', linewidth=2)
plt.plot(test_dates, y_pred, label='Predicted (GRU)', color='red', linestyle='--', linewidth=2)
plt.title('Actual vs Predicted - EMD-GRU Model (Test Period)')
plt.xlabel('Date')
plt.ylabel('Price (Rs./kg)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_result221.png", dpi=300, bbox_inches='tight')
plt.show()
In [ ]:
In [ ]:
In [34]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
warnings.filterwarnings("ignore")
df = pd.read_excel(r"C:\Users\marti\Desktop\Data\price data\cardamom\Kerala\New folder - Copy - Copy\tho.xlsx", parse_dates=True)
np.random.seed(0)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Modal Price (Rs./kg)'] = df['Modal Price (Rs./Quintal)']/100
data = df['Modal Price (Rs./kg)'].values
# --- Step 1: EMD Decomposition ---
print("Performing EMD decomposition...")
from PyEMD import EMD
emd = EMD()
imfs = emd.emd(data, max_imf=5)
# Filter meaningful IMFs (remove low-variance components)
imfs = [imf for imf in imfs if np.var(imf) > 0.05*np.var(data)]
print(f"Selected {len(imfs)} meaningful IMFs")
# --- Step 2: Data Split ---
lookback = 0 # Not needed in SARIMA
total_size = len(data)
train_size = int(total_size*0.7)
val_size = int(total_size*0.15)
test_size = total_size - train_size - val_size
train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:]
start_test_idx = train_size+val_size
test_dates = df.index[start_test_idx:start_test_idx+test_size]
# --- Step 3: SARIMA Modeling for each IMF ---
param_grid = {
'order': [(2,1,0),(5,1,0),(3,0,2),(2,0,3)],
'seasonal_order': [(0,1,1,26), (1,1,1,26), (0,1,0,26),(2,0,1,26),(2,0,2,26),(2,0,0,26)]
}
from itertools import product
best_imf_models = {}
def train_sarima(imf, order, seasonal_order, train_size, val_size):
try:
model = SARIMAX(imf[:train_size],
order=order,
seasonal_order=seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False)
model_fit = model.fit(disp=False)
val_pred = model_fit.forecast(steps=val_size)
return mean_squared_error(imf[train_size:train_size+val_size], val_pred), model_fit
except:
return np.inf, None
for i, imf in enumerate(imfs, start=1):
print(f"\nTraining SARIMA for IMF {i} (Variance: {np.var(imf):.2f})")
scores_models = [
train_sarima(imf, order, seas_order, train_size, val_size)
for order, seas_order in product(param_grid['order'], param_grid['seasonal_order'])
]
scores, models = zip(*scores_models)
best_idx = np.argmin(scores)
best_imf_models[f'IMF_{i}'] = models[best_idx]
if models[best_idx]:
print(f"Best params: {models[best_idx].model.order}x{models[best_idx].model.seasonal_order} | MSE: {scores[best_idx]:.4f}")
# --- Step 4: Forecast and Reconstruction ---
test_predictions = np.zeros(test_size)
for imf_name, model in best_imf_models.items():
if model:
test_predictions += model.forecast(steps=test_size)
# --- Step 5: Evaluation ---
mse = mean_squared_error(test_data, test_predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(test_data, test_predictions)
mape = mean_absolute_percentage_error(test_data, test_predictions)
r2 = r2_score(test_data, test_predictions)
print("\nEMD-SARIMA Test Evaluation:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.2f}")
print(f"R²: {r2:.4f}")
# --- Step 6: Actual vs Predicted Plot ---
plt.figure(figsize=(12, 6))
plt.plot(test_dates, test_data, label='Actual', color='blue', linewidth=2)
plt.plot(test_dates, test_predictions, label='EMD-SARIMA Forecast', color='red', linestyle='--', linewidth=2)
plt.title("EMD-SARIMA: Actual vs Predicted Cardamom Prices (Test Set)")
plt.xlabel("Date")
plt.ylabel("Price (Rs./kg)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users/marti/Desktop/png/egr_sarima_actual_vs_predicted.png", dpi=300, bbox_inches='tight')
plt.show()
Performing EMD decomposition... Selected 5 meaningful IMFs Training SARIMA for IMF 1 (Variance: 83003.95) Best params: (3, 0, 2)x(2, 0, 0, 26) | MSE: 2981.6178 Training SARIMA for IMF 2 (Variance: 35017.08) Best params: (2, 0, 3)x(2, 0, 2, 26) | MSE: 5921.9670 Training SARIMA for IMF 3 (Variance: 49802.40) Best params: (2, 1, 0)x(2, 0, 0, 26) | MSE: 14621.6606 Training SARIMA for IMF 4 (Variance: 99210.38) Best params: (3, 0, 2)x(1, 1, 1, 26) | MSE: 8451.3243 Training SARIMA for IMF 5 (Variance: 175117.02) Best params: (5, 1, 0)x(0, 1, 1, 26) | MSE: 0.3374 EMD-SARIMA Test Evaluation: MSE: 681121.5636 RMSE: 825.3009 MAE: 708.2112 MAPE: 0.43 R²: -2.4266
In [1]:
# Print the SARIMA model summary for each selected IMF
for imf_name, model in best_imf_models.items():
if model:
print(f"\n{imf_name} SARIMA Model Summary:")
print(model.summary())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[1], line 2 1 # Print the SARIMA model summary for each selected IMF ----> 2 for imf_name, model in best_imf_models.items(): 3 if model: 4 print(f"\n{imf_name} SARIMA Model Summary:") NameError: name 'best_imf_models' is not defined
In [ ]: